# Julia Schulte-Cloos 

# Replication Files: "The effect of EP Elections on Poliical Socialisation"

# forthc. Journal of European Public Policy (DOI:10.1080/13501763.2019.1620841)




# > xfun::session_info()
# R version 3.5.1 (2018-07-02)
# Platform: x86_64-w64-mingw32/x64 (64-bit)
# Running under: Windows 10 x64 (build 17763), RStudio 1.2.1335
# 
# Locale:
#   LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252    LC_MONETARY=German_Germany.1252
# LC_NUMERIC=C                    LC_TIME=German_Germany.1252    
# 
# Package version:
# assertthat_0.2.1   backports_1.1.4    BH_1.69.0.1        cli_1.1.0          colorspace_1.4-1  
# compiler_3.5.1     crayon_1.3.4       digest_0.6.19      dplyr_0.8.1        fansi_0.4.0       
# ggplot2_3.1.1      glue_1.3.1         graphics_3.5.1     grDevices_3.5.1    grid_3.5.1        
# gtable_0.3.0       labeling_0.3       lattice_0.20-35    lazyeval_0.2.2     magrittr_1.5      
# MASS_7.3.50        Matrix_1.2-14      methods_3.5.1      mgcv_1.8.24        munsell_0.5.0     
# nlme_3.1.137       pillar_1.4.0       pkgconfig_2.0.2    plogr_0.2.0        plyr_1.8.4        
# purrr_0.3.2        R6_2.4.0           RColorBrewer_1.1.2 Rcpp_1.0.1         reshape2_1.4.3    
# rlang_0.3.4        rstudioapi_0.10    scales_1.0.0       stats_3.5.1        stringi_1.4.3     
# stringr_1.4.0      tibble_2.1.1       tidyselect_0.2.5   tools_3.5.1        utf8_1.1.4        
# utils_3.5.1        vctrs_0.1.0        viridisLite_0.3.0  withr_2.1.2        xfun_0.7          
# zeallot_0.1.0    




## ----setup, include=FALSE------------------------------------------------

if (!require("pacman")) install.packages("pacman")

# pacman load packages

pacman::p_load(
# reproducibility
      knitr,
      here,
# data handling
      dplyr,
      tidyr,
      tidyverse,
      stringr,
      AER,# ivreg
      kableExtra, # complete table
# graphics
      ggplot2, # static graphics
# other
      viridis,
      magrittr,
      haven,
      Lmoments,
      optmatch,
      RItools,
      Matching,
      xtable,
      lmtest,
      readxl,
# layout-presentation
      extrafont,
      gridExtra,
      captioner,
      stargazer,
      estimatr, 
      remotes) 

remotes::install_github("acoppock/commarobust", ref = "legacy")
library(commarobust)


suppressPackageStartupMessages(library(tidyverse))


opts_chunk$set(tidy = TRUE,
	       echo = FALSE,
	       results = 'markup',
	       strip.white = TRUE,
	       fig.path = 'figs/fig',
	       cache = FALSE,
	       cache.extra = rand_seed, 
	       highlight = TRUE,
	       width.cutoff = 100,
	       size = 'footnotesize',
	       out.width = '.9\\textwidth',
	       message = FALSE,
	       warning = FALSE,
	       comment = NA)

options(width = 100,
	digits = 3)


# https://github.com/kolesarm/Robust-Small-Sample-Standard-Errors/blob/master/BM_StandardErrors.R
# function provided by Michal Kolesar: BM_StandardErrors.R
source("./BM_StandardErrors.R")


#runif(1,0, 10^8)
set.seed(64558030)






## ----read-csv-euyou------------------------------------------------------

euyou.data <- read_csv(file="./euyou.csv")
epsoc.data <- read_csv(file="./euyou.csv")



## ----epsoc-dataset, echo=FALSE, message=FALSE----------------------------

set.seed(64558030)

#subset the dataset as to include only respondents who have not yet experienced a national election
epsoc.data<- epsoc.data%>% dplyr::filter(count_eligNE==0 & eligdate!= as.Date("2004-06-15", format= '%Y-%m-%d'))

epsoc.data$eligtreat <- if_else(epsoc.data$eligdate < as.Date("2004-06-15", format= '%Y-%m-%d'), 1, 0)




#exclude austria (presidential elections April 25 2004) # including Austrian respondents that came of age AFTER the presidential election, but before the EP leave us only with 6 treated individuals.
epsoc.data <- epsoc.data %>% filter(country!=1)

#Presidential elections were held in Slovakia on 3 April 2004, exclude Slovakia as well. only 10 respondents in treatment left.
epsoc.data <- epsoc.data %>% filter(country!=7)

#bandwidth 9 months
lower_bound <- 213
upper_bound <- 231
epsoc.data = epsoc.data %>% filter(agemon >= lower_bound & agemon <= upper_bound)




## ----education-variables-------------------------------------------------

# recode the education variable as to include everyone who is still at school at the time of the interview as HIGHER EDUCATION

#   0 	still at school 	        
# 	1 	left school at 12 to 15 	
# 	2 	left school at 16 	 	
# 	3 	left school at 17 	 	  
# 	4 	left school at 18 	 	
# 	5 	left school at 19 	 	
# 	6 	left school at 20 to 25


epsoc.data$highedu <- if_else(epsoc.data$Q40_REC >=1 & epsoc.data$Q40_REC<=3, 0, 1)








## ----bandwidth-balance, message=FALSE, echo=FALSE, results="asis", include=FALSE----

set.seed(64558030)

epsoc9.data <- epsoc.data
#bandwidth 9 months
lower_bound <- 213
upper_bound <- 231
epsoc9.data = epsoc9.data %>% filter(agemon >= lower_bound & agemon <= upper_bound)


#the relevant pre-treatment covariates are
covs <- matrix(c('q48', 'Urbanisation',
                 'male', 'Gender',
                 'q45', 'Standard of Living',
                 'q47', 'Religiousness',
                 'HIDIPLO', 'Parents Higher Education',
                 'Q44B', 'Living with Parents',
                 'highedu', 'Education',
                 'votingp', 'Vote Habits of Parents',
                 'interestp', 'Pol. Interest of Parents',
                  'schoolengage', 'School Engagement'),
               ncol = 2,
               byrow = TRUE)

dimnames(covs) <- list(seq(from = 1,
                           to = 10,
                           by = 1),
                       c("Covariate", "Description"))



### outcome political interest ### 
col_vector <- c("country", "eligtreat", "q1", covs[,1], "ID") 
na_epsoc9_q1.data <- epsoc9.data %>% select_(.dots = col_vector)
na_epsoc9_q1.data <- na.omit(na_epsoc9_q1.data) 
names(na_epsoc9_q1.data) <- col_vector


### IPW epsoc9.data_q1 ###
blocks <- unique(na_epsoc9_q1.data$country[!is.na(na_epsoc9_q1.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc9_q1.data$eligtreat[na_epsoc9_q1.data$eligtreat==1 & na_epsoc9_q1.data$country==x])/(length(na_epsoc9_q1.data$eligtreat[na_epsoc9_q1.data$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment 9 bandwidth", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc9_q1.data <- right_join(na_epsoc9_q1.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc9_q1.data$ipw <- (1/na_epsoc9_q1.data$p_itblock)*na_epsoc9_q1.data$eligtreat +
  (1/(1-na_epsoc9_q1.data$p_itblock))*(1-na_epsoc9_q1.data$eligtreat)


#the relevant pre-treatment covariates are
covs <- matrix(c('q48', 'Urbanisation',
                 'male', 'Gender',
                 'q45', 'Standard of Living',
                 'q47', 'Religiousness',
                 'HIDIPLO', 'Parents Higher Education',
                 'Q44B', 'Living with Parents',
                 'highedu', 'Education',
                 'votingp', 'Vote Habits of Parents',
                 'interestp', 'Pol. Interest of Parents',
                 'schoolengage', 'School Engagement'),
               ncol = 2,
               byrow = TRUE)

dimnames(covs) <- list(seq(from = 1,
                           to = 10,
                           by = 1),
                       c("Covariate", "Description"))

kable(covs)



bal_fmla <- reformulate(covs[1:10], response = "eligtreat")

#add stratum name to ipw numeric vector
na_epsoc9_q1.data_ipwv = na_epsoc9_q1.data %>% group_by(country, ipw) %>%
                        dplyr::select(country, ipw) %>%
                        slice(1)
ipwv <- na_epsoc9_q1.data_ipwv$ipw
country <- c(na_epsoc9_q1.data_ipwv$country)
ipwv <- setNames(ipwv, c(country))

xbal_epsoc9.data_q1 <- xBalance(bal_fmla,
                           strata=list(country=~country),
                           stratum.weights=ipwv,
                           data=na_epsoc9_q1.data, report=c("std.diffs", "adj.means", "chisquare.test"))
xbal_epsoc9.data_q1.xtab <- xtable(xbal_epsoc9.data_q1)
rownames(xbal_epsoc9.data_q1.xtab) <- c('Urbanisation', 'Gender',
                                   'Standard of Living',
                                   'Religiousness',
                                   'Parents Higher Education',
                                   'Living with Parents',
                                   'Education',
                                   'Voting Habit of Parents', 'Political Interest of Parents',
                                   'School Engagement')



#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE B.4: Balance Statistics     ###
#<!-- ------------------------------------------------------------------------------------------ -->


print(xtable(xbal_epsoc9.data_q1.xtab, digits=2,
             caption="Balance Statistics (Weighted for Country-Specific Probability of Treatment Assignment)", align="lrrrl"),
      caption.placement="top", comment=F, file="./tableA4.txt", include.colnames = F)






epsoc8.data <- epsoc.data
#bandwidth 8 months
lower_bound <- 214
upper_bound <- 230
epsoc8.data = epsoc8.data %>% filter(agemon >= lower_bound & agemon <= upper_bound)

### outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", covs[,1])
na_epsoc8_q1.data <- epsoc8.data %>% select_(.dots = col_vector)
na_epsoc8_q1.data <- na.omit(na_epsoc8_q1.data)
names(na_epsoc8_q1.data) <- col_vector


### IPW epsoc8.data_q1 ###
blocks <- unique(na_epsoc8_q1.data$country[!is.na(na_epsoc8_q1.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc8_q1.data$eligtreat[na_epsoc8_q1.data$eligtreat==1 & na_epsoc8_q1.data$country==x])/(length(na_epsoc8_q1.data$eligtreat[na_epsoc8_q1.data$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment 8 bandwidth", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc8_q1.data <- right_join(na_epsoc8_q1.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc8_q1.data$ipw <- (1/na_epsoc8_q1.data$p_itblock)*na_epsoc8_q1.data$eligtreat +
  (1/(1-na_epsoc8_q1.data$p_itblock))*(1-na_epsoc8_q1.data$eligtreat)





epsoc7.data <- epsoc.data
#bandwidth 7 months
lower_bound <- 215
upper_bound <- 229
epsoc7.data = epsoc7.data %>% filter(agemon >= lower_bound & agemon <= upper_bound)

### outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", covs[,1])
na_epsoc7_q1.data <- epsoc7.data %>% select_(.dots = col_vector)
na_epsoc7_q1.data <- na.omit(na_epsoc7_q1.data)
names(na_epsoc7_q1.data) <- col_vector


### IPW epsoc7.data_q1 ###
blocks <- unique(na_epsoc7_q1.data$country[!is.na(na_epsoc7_q1.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc7_q1.data$eligtreat[na_epsoc7_q1.data$eligtreat==1 & na_epsoc7_q1.data$country==x])/(length(na_epsoc7_q1.data$eligtreat[na_epsoc7_q1.data$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment 7 bandwidth", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc7_q1.data <- right_join(na_epsoc7_q1.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc7_q1.data$ipw <- (1/na_epsoc7_q1.data$p_itblock)*na_epsoc7_q1.data$eligtreat +
  (1/(1-na_epsoc7_q1.data$p_itblock))*(1-na_epsoc7_q1.data$eligtreat)




epsoc6.data <- epsoc.data
#bandwidth 6 months
lower_bound <- 216
upper_bound <- 228
epsoc6.data = epsoc6.data  %>% filter(agemon >= lower_bound & agemon <= upper_bound)

### outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", covs[,1])
na_epsoc6_q1.data <- epsoc6.data %>% select_(.dots = col_vector)
na_epsoc6_q1.data <- na.omit(na_epsoc6_q1.data)
names(na_epsoc6_q1.data) <- col_vector


### IPW epsoc6.data_q1 ###
blocks <- unique(na_epsoc6_q1.data$country[!is.na(na_epsoc6_q1.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc6_q1.data$eligtreat[na_epsoc6_q1.data$eligtreat==1 & na_epsoc6_q1.data$country==x])/(length(na_epsoc6_q1.data$eligtreat[na_epsoc6_q1.data$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment 6 bandwidth", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc6_q1.data <- right_join(na_epsoc6_q1.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc6_q1.data$ipw <- (1/na_epsoc6_q1.data$p_itblock)*na_epsoc6_q1.data$eligtreat +
  (1/(1-na_epsoc6_q1.data$p_itblock))*(1-na_epsoc6_q1.data$eligtreat)




epsoc5.data <- epsoc.data
#bandwidth 5 months
lower_bound <- 217
upper_bound <- 227
epsoc5.data = epsoc5.data %>% filter(agemon >= lower_bound & agemon <= upper_bound)

### outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", covs[,1])
na_epsoc5_q1.data <- epsoc5.data %>% select_(.dots = col_vector)
na_epsoc5_q1.data <- na.omit(na_epsoc5_q1.data)
names(na_epsoc5_q1.data) <- col_vector


### IPW epsoc5.data_q1 ###
blocks <- unique(na_epsoc5_q1.data$country[!is.na(na_epsoc5_q1.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc5_q1.data$eligtreat[na_epsoc5_q1.data$eligtreat==1 & na_epsoc5_q1.data$country==x])/(length(na_epsoc5_q1.data$eligtreat[na_epsoc5_q1.data$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment 5 bandwidth", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc5_q1.data <- right_join(na_epsoc5_q1.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc5_q1.data$ipw <- (1/na_epsoc5_q1.data$p_itblock)*na_epsoc5_q1.data$eligtreat +
  (1/(1-na_epsoc5_q1.data$p_itblock))*(1-na_epsoc5_q1.data$eligtreat)



epsoc4.data <- epsoc.data
#bandwidth 4 months
lower_bound <- 218
upper_bound <- 226
epsoc4.data = epsoc4.data %>% filter(agemon >= lower_bound & agemon <= upper_bound)

### outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", covs[,1])
na_epsoc4_q1.data <- epsoc4.data %>% select_(.dots = col_vector)
na_epsoc4_q1.data <- na.omit(na_epsoc4_q1.data)
names(na_epsoc4_q1.data) <- col_vector


### IPW epsoc4.data_q1 ###
blocks <- unique(na_epsoc4_q1.data$country[!is.na(na_epsoc4_q1.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc4_q1.data$eligtreat[na_epsoc4_q1.data$eligtreat==1 & na_epsoc4_q1.data$country==x])/(length(na_epsoc4_q1.data$eligtreat[na_epsoc4_q1.data$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment 4 bandwidth", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc4_q1.data <- right_join(na_epsoc4_q1.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc4_q1.data$ipw <- (1/na_epsoc4_q1.data$p_itblock)*na_epsoc4_q1.data$eligtreat +
  (1/(1-na_epsoc4_q1.data$p_itblock))*(1-na_epsoc4_q1.data$eligtreat)





## ----ipw-ep-inelig, echo=FALSE, warning=FALSE, results="asis", include=FALSE----

set.seed(64558030)

### outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", covs[,1], "ID")
na_epsoc_q1.data <- epsoc.data %>% select_(.dots = col_vector)
na_epsoc_q1.data <- na.omit(na_epsoc_q1.data)
names(na_epsoc_q1.data) <- col_vector


# weighting to account for different probabilities of treatment within strata

#p_ij = P(D_ij = 1) vary by strata.
# w = 1/p_ij * D_i + 1/(1-p_ij)* (1-D_i)
# When treatment probabilities vary by block, then OLS will weight
# blocks by the variance of the treatment variable in each block. Without
# correcting for this, OLS will result in biased estimates of ATE

# If probability of treatment assignment varies across blocks, then weight
# treated units by probability of being in treatment and controls by the
# probability of being a control.


### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_q1.data$country[!is.na(na_epsoc_q1.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
    length(na_epsoc_q1.data$eligtreat[na_epsoc_q1.data$eligtreat==1 & na_epsoc_q1.data$country==x])/(length(na_epsoc_q1.data$eligtreat[na_epsoc_q1.data$country==x]))
  }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment Elig 2004", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc_q1.data <- right_join(na_epsoc_q1.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_q1.data$ipw <- (1/na_epsoc_q1.data$p_itblock)*na_epsoc_q1.data$eligtreat +
                    (1/(1-na_epsoc_q1.data$p_itblock))*(1-na_epsoc_q1.data$eligtreat)




#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment (Eligibility)")



## table appendix

#kable(na_epsoc_q1.data %>% group_by(country) %>% summarise(Ncountry = n()))



## ----balance-statistics-etov-table, echo=FALSE, warning=FALSE, results = "asis", include=FALSE----

set.seed(64558030)

#      BALANCE TABLE          

covs <- matrix(c('q48', 'Urbanisation',
                 'male', 'Gender',
                 'q45', 'Standard of Living',
                 'q47', 'Religiousness',
                 'HIDIPLO', 'Parents Higher Education',
                 'Q44B', 'Living with Parents',
                 'highedu', 'Education',
                 'votingp', 'Vote Habits of Parents',
                 'interestp', 'Pol. Interest of Parents',
                 'schoolengage','School Engagement'),
               ncol = 2,
               byrow = TRUE)

dimnames(covs) <- list(seq(from = 1,
                           to = 10,
                           by = 1),
                       c("Covariate", "Description"))
#kable(covs)
bal_fmla <- reformulate(covs[1:10], response = "eligtreat")


#add stratum name to ipw numeric vector
na_epsoc_q1_ipwv.data = na_epsoc_q1.data %>% group_by(country, ipw) %>%
  dplyr::select(country, ipw) %>%
  slice(1)
ipwv <- na_epsoc_q1_ipwv.data$ipw
country <- c(na_epsoc_q1_ipwv.data$country)
ipwv <- setNames(ipwv, c(country))

xbal_epsoc_q1 <- xBalance(bal_fmla,
                 strata=list(country=~country),
                 stratum.weights=ipwv,
                 data=na_epsoc_q1.data, report=c("adj.means", "adj.mean.diffs", "chisquare.test", "std.diffs",
                   "p.values"))

xbal_epsoc_q1.xtab <- xtable(xbal_epsoc_q1)
nrow(na_epsoc_q1.data)

rownames(xbal_epsoc_q1.xtab) <- c('Urbanisation', 'Gender',
               'Standard of Living',
                'Religiousness',
                'Parents? Higher Education',
                'Living with Parents',
                 'Education',
                 'Voting Habit of Parents', 'Political Interest of Parents',
                  'Civic Engagement in School')
xbal_epsoc_q1$overall


print(xtable(xbal_epsoc_q1.xtab, digits=2, caption="Balance Statistics",  align="lrrrrl"),  caption.placement="top", comment=F)


kable(xbal_epsoc_q1.xtab, digits= 2, caption= "Balance Statistics",
      col.names = c( "Control", "Treatment", "Diff. in Means", "Std. Diff", ""),
      booktabs = T)



## ----pol-int-estimates, echo=FALSE, warning=FALSE, message=FALSE---------

set.seed(64558030)


int_uni_epsoc <- lm(q1 ~ eligtreat + as.factor(country), data=na_epsoc_q1.data, weights = ipw)
BM_epsoc_int_uni <- BMlmSE(int_uni_epsoc, clustervar = as.factor(na_epsoc_q1.data$country))



int_uni_epsoc <- lm(q1 ~ eligtreat + as.factor(country), data=na_epsoc_q1.data, weights = ipw)
BM_epsoc_int_uni <- BMlmSE(int_uni_epsoc, clustervar = as.factor(na_epsoc_q1.data$country))


int_lm_epsoc <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) , data=na_epsoc_q1.data, weights = ipw, clusters = country)
BM_epsoc_int <- BMlmSE(int_lm_epsoc, clustervar = as.factor(na_epsoc_q1.data$country))



permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_q1.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility
#bivariate + countryFE
tmplm <- lm(q1 ~ eligtreat + as.factor(country), data=na_epsoc_q1.data, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_q1.data$eligtreat, y = na_epsoc_q1.data$q1))

## p-value blocks
p_two_sided_epsoc_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariates
tmplm <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) , data=na_epsoc_q1.data, weights = ipw, clusters = country)
est_itt <- coef(tmplm)[["eligtreat"]]

#rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_q1.data$eligtreat, y = na_epsoc_q1.data$q1))



## p-value blocks
p_two_sided_epsoc <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


polint_randominfp <- c(round(p_two_sided_epsoc_uni, digits=3), round(p_two_sided_epsoc, digits=3), NULL, NULL)







## ----cace-polint, message=FALSE, echo=FALSE, warning=FALSE, results="asis", include=FALSE----

set.seed(64558030)

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1], "ID")
cace_epsoc9_q1.data <- epsoc9.data %>% select_(.dots = col_vector)
cace_epsoc9_q1.data$q11[cace_epsoc9_q1.data$eligtreat==0] <- 0

cace_epsoc9_q1.data <- na.omit(cace_epsoc9_q1.data)
names(cace_epsoc9_q1.data) <- col_vector
# add the ipw variable
cace_epsoc9_q1.data = left_join(cace_epsoc9_q1.data, na_epsoc_q1.data)


fit.2sls <- ivreg(q1 ~ q11 + as.factor(country)
                  | eligtreat + as.factor(country), data=cace_epsoc9_q1.data, weights = ipw)
est_fit.2sls <- commarobust_tidy(fit.2sls, clust_var = cace_epsoc9_q1.data$country)


fit.2sls_cov <- ivreg(q1 ~ q11 + highedu + Q44B + q45 + q47 + HIDIPLO +
                        male  +q48 + votingp + interestp + schoolengage + as.factor(country)
                    |  eligtreat + highedu + Q44B + q45 + q47 + HIDIPLO +
                        male  +q48 + votingp + interestp + schoolengage + as.factor(country),  data=cace_epsoc9_q1.data,  clusters=country, weights = ipw)
est_fit.2sls_cov <- commarobust_tidy(fit.2sls_cov, clust_var = cace_epsoc9_q1.data$country)


# first-stage of CACE sample also significant? YES.

firststage_uni_epsoc <- lm(q1 ~ eligtreat + as.factor(country), data=cace_epsoc9_q1.data, weights = ipw)

firststage_lm_epsoc <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) , data=cace_epsoc9_q1.data, weights = ipw, clusters = country)





## ----pol-int-table, message=FALSE, results="asis"------------------------

#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE 1: Effect of first-time EP eligibility and voting on interest in politics ###
#<!-- ------------------------------------------------------------------------------------------ -->


polint_randominfp <- c(round(p_two_sided_epsoc_uni, digits=2), round(p_two_sided_epsoc, digits=2), NULL, NULL)

stargazer(int_uni_epsoc,
          int_lm_epsoc,
          fit.2sls,
          fit.2sls_cov,
          se=list(BM_epsoc_int_uni$adj.se,
                  BM_epsoc_int$adj.se,
                  est_fit.2sls[,3],
                  est_fit.2sls_cov[,3]),
          p = list(NULL,
                  NULL,
                  est_fit.2sls[,5],
                  est_fit.2sls_cov[,5]),
          omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          covariate.labels = c("Eligible", "Voting"),
          dep.var.caption=c("Dependent variable: political interest"),
          dep.var.labels.include = FALSE,
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_epsoc_uni, digits=2),
                             round(p_two_sided_epsoc, digits=2),
                             NULL,
                             NULL),
                           c("Age", "[17.25-18.75]", "[17.25-18.75]", "[17.25-18.75]", "[17.25-18.75]"),
                           c("Method", "OLS", "OLS", "IV", "IV"),
                           c("Controls", "no", "yes", "no", "yes")),
          #, "Education", "Household with Parents", "Standard of Living", "Religious", "Higher Education Parents", "Gender", "Urban-Rural", "Voting Habits Parents", "Political Interest Parents", "Civic Engagement in School"),
          title=c("Effect of first-time EP eligibility and voting on interest in politics"),
          #order = c(1, 12, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
          header=FALSE, table.placement = "tb",
          model.names = FALSE,
          notes.align = "l",
          notes.append = FALSE, 
          notes = c("\\parbox{0.95\\textwidth}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. Constant and country-fixed effects omitted from output. Bell-McCaffrey bias adjusted robust SE in parentheses. Inverse probability weights accounting for different probabilities of assignment to treatment and control conditions between country blocks. Entries of eligibility present ITT estimates, entries of voting CACE estimates.}"),
          label = c("tab:polint"),
          notes.label = "",
          font.size="small",
          single.row = TRUE,
          digits=2,
          column.sep.width = "0pt", 
          type = "text", out="./table1.txt"
                )



## ----lm-elig-partisan, warning=FALSE, echo=FALSE, results='asis', message=FALSE, include=FALSE----

set.seed(64558030)

## Effects on Partisan Preferences

#ppr parties  
colvector <- c("country", "eligtreat", "prop_ppr",
               "highedu", "Q44B", "q45", "q47",
               "HIDIPLO", "male", "q48", "votingp",
               "interestp", "schoolengage", "ID", "q11")
na_epsoc_lm_ppr <- epsoc.data %>% select_(.dots = colvector)
na_epsoc_lm_ppr$q11[na_epsoc_lm_ppr$eligtreat==0] <- 0

na_epsoc_lm_ppr <- na.omit(na_epsoc_lm_ppr)
names(na_epsoc_lm_ppr) <- colvector

# weighting to account for different probabilities of treatment within strata ####

### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_lm_ppr$country[!is.na(na_epsoc_lm_ppr$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
    length(na_epsoc_lm_ppr$eligtreat[na_epsoc_lm_ppr$eligtreat==1 & na_epsoc_lm_ppr$country==x])/(length(na_epsoc_lm_ppr$eligtreat[na_epsoc_lm_ppr$country==x]))
  }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")


na_epsoc_lm_ppr <- right_join(na_epsoc_lm_ppr, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_lm_ppr$ipw <- (1/na_epsoc_lm_ppr$p_itblock)*na_epsoc_lm_ppr$eligtreat +
                    (1/(1-na_epsoc_lm_ppr$p_itblock))*(1-na_epsoc_lm_ppr$eligtreat)

epsoc.data_lm_ppr_uni <- lm(prop_ppr ~ eligtreat + as.factor(country),  
            data = na_epsoc_lm_ppr, weight=ipw, clusters= country)
BM_epsoc_ppr_uni <- BMlmSE(epsoc.data_lm_ppr_uni, clustervar = as.factor(na_epsoc_lm_ppr$country))

epsoc.data_lm_ppr <- lm(prop_ppr ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) ,
            data = na_epsoc_lm_ppr, weight=ipw, clusters = country)
BM_epsoc_ppr <- BMlmSE(epsoc.data_lm_ppr, clustervar = as.factor(na_epsoc_lm_ppr$country))


#randomisation inference prop_ppr
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_lm_ppr$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}

#bivariate + countryFE
tmplm <- lm(prop_ppr ~ eligtreat + as.factor(country), data=na_epsoc_lm_ppr, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_ppr$eligtreat, y = na_epsoc_lm_ppr$prop_ppr))

## p-value blocks
p_two_sided_prop_ppr_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariate
tmplm <- lm(prop_ppr ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_lm_ppr, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_ppr$eligtreat, y = na_epsoc_lm_ppr$prop_ppr))

## p-value blocks
p_two_sided_prop_ppr <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))





## Effects on Partisan Preferences

#gre parties  
colvector <- c("country", "eligtreat", "prop_gre", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "ID", "q11")
na_epsoc_lm_gre <- epsoc.data %>% select_(.dots = colvector)
na_epsoc_lm_gre$q11[na_epsoc_lm_gre$eligtreat==0] <- 0

na_epsoc_lm_gre <- na.omit(na_epsoc_lm_gre)
names(na_epsoc_lm_gre) <- colvector

# weighting to account for different probabilities of treatment within strata ####

### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_lm_gre$country[!is.na(na_epsoc_lm_gre$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
    length(na_epsoc_lm_gre$eligtreat[na_epsoc_lm_gre$eligtreat==1 & na_epsoc_lm_gre$country==x])/(length(na_epsoc_lm_gre$eligtreat[na_epsoc_lm_gre$country==x]))
  }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")

na_epsoc_lm_gre <- right_join(na_epsoc_lm_gre, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_lm_gre$ipw <- (1/na_epsoc_lm_gre$p_itblock)*na_epsoc_lm_gre$eligtreat +
                    (1/(1-na_epsoc_lm_gre$p_itblock))*(1-na_epsoc_lm_gre$eligtreat)


epsoc.data_lm_gre_uni <- lm(prop_gre ~ eligtreat + as.factor(country),  
          data = na_epsoc_lm_gre, weight=ipw, clusters= country)
BM_epsoc_gre_uni <- BMlmSE(epsoc.data_lm_gre_uni, clustervar = as.factor(na_epsoc_lm_gre$country))

epsoc.data_lm_gre <- lm(prop_gre ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) ,
            data = na_epsoc_lm_gre, weight=ipw, clusters = country)
BM_epsoc_gre <- BMlmSE(epsoc.data_lm_gre, clustervar = as.factor(na_epsoc_lm_gre$country))


#randomisation inference prop_gre
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_lm_gre$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}

#bivariate + countryFE
tmplm <- lm(prop_gre ~ eligtreat + as.factor(country), data=na_epsoc_lm_gre, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_gre$eligtreat, y = na_epsoc_lm_gre$prop_gre))

## p-value blocks
p_two_sided_prop_gre_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariate
tmplm <- lm(prop_gre ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_lm_gre, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_gre$eligtreat, y = na_epsoc_lm_gre$prop_gre))

## p-value blocks
p_two_sided_prop_gre <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))






## Effects on Partisan Preferences

#rle parties  
colvector <- c("country", "eligtreat", "prop_rle", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "ID", "q11")
na_epsoc_lm_rle <- epsoc.data %>% select_(.dots = colvector)
na_epsoc_lm_rle$q11[na_epsoc_lm_rle$eligtreat==0] <- 0

na_epsoc_lm_rle <- na.omit(na_epsoc_lm_rle)
names(na_epsoc_lm_rle) <- colvector

# weighting to account for different probabilities of treatment within strata ####

### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_lm_rle$country[!is.na(na_epsoc_lm_rle$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
    length(na_epsoc_lm_rle$eligtreat[na_epsoc_lm_rle$eligtreat==1 & na_epsoc_lm_rle$country==x])/(length(na_epsoc_lm_rle$eligtreat[na_epsoc_lm_rle$country==x]))
  }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")

na_epsoc_lm_rle <- right_join(na_epsoc_lm_rle, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_lm_rle$ipw <- (1/na_epsoc_lm_rle$p_itblock)*na_epsoc_lm_rle$eligtreat +
                    (1/(1-na_epsoc_lm_rle$p_itblock))*(1-na_epsoc_lm_rle$eligtreat)


epsoc.data_lm_rle_uni <- lm(prop_rle ~ eligtreat + as.factor(country),  
            data = na_epsoc_lm_rle, weight=ipw, clusters= country)
BM_epsoc_rle_uni <- BMlmSE(epsoc.data_lm_rle_uni, clustervar = as.factor(na_epsoc_lm_rle$country))

epsoc.data_lm_rle <- lm(prop_rle ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) ,
            data = na_epsoc_lm_rle, weight=ipw, clusters = country)
BM_epsoc_rle <- BMlmSE(epsoc.data_lm_rle, clustervar = as.factor(na_epsoc_lm_rle$country))


#randomisation inference prop_rle
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_lm_rle$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}

#bivariate + countryFE
tmplm <- lm(prop_rle ~ eligtreat + as.factor(country), data=na_epsoc_lm_rle, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_rle$eligtreat, y = na_epsoc_lm_rle$prop_rle))

## p-value blocks
p_two_sided_prop_rle_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariate
tmplm <- lm(prop_rle ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_lm_rle, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_rle$eligtreat, y = na_epsoc_lm_rle$prop_rle))

## p-value blocks
p_two_sided_prop_rle <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))






## Effects on Partisan Preferences

#antieu parties  
colvector <- c("country", "eligtreat", "prop_antieu", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "ID", "q11")
na_epsoc_lm_antieu <- epsoc.data %>% select_(.dots = colvector)
na_epsoc_lm_antieu$q11[na_epsoc_lm_antieu$eligtreat==0] <- 0

na_epsoc_lm_antieu <- na.omit(na_epsoc_lm_antieu)
names(na_epsoc_lm_antieu) <- colvector

# weighting to account for different probabilities of treatment within strata ####

### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_lm_antieu$country[!is.na(na_epsoc_lm_antieu$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
    length(na_epsoc_lm_antieu$eligtreat[na_epsoc_lm_antieu$eligtreat==1 & na_epsoc_lm_antieu$country==x])/(length(na_epsoc_lm_antieu$eligtreat[na_epsoc_lm_antieu$country==x]))
  }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")

na_epsoc_lm_antieu <- right_join(na_epsoc_lm_antieu, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_lm_antieu$ipw <- (1/na_epsoc_lm_antieu$p_itblock)*na_epsoc_lm_antieu$eligtreat +
                    (1/(1-na_epsoc_lm_antieu$p_itblock))*(1-na_epsoc_lm_antieu$eligtreat)


epsoc.data_lm_antieu_uni <- lm(prop_antieu ~ eligtreat + as.factor(country),  
            data = na_epsoc_lm_antieu, weight=ipw, clusters= country)
BM_epsoc_antieu_uni <- BMlmSE(epsoc.data_lm_antieu_uni, clustervar = as.factor(na_epsoc_lm_antieu$country))

epsoc.data_lm_antieu <- lm(prop_antieu ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) ,
            data = na_epsoc_lm_antieu, weight=ipw, clusters = country)
BM_epsoc_antieu <- BMlmSE(epsoc.data_lm_antieu, clustervar = as.factor(na_epsoc_lm_antieu$country))


#randomisation inference prop_antieu
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_lm_antieu$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}

#bivariate + countryFE
tmplm <- lm(prop_antieu ~ eligtreat + as.factor(country), data=na_epsoc_lm_antieu, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_antieu$eligtreat, y = na_epsoc_lm_antieu$prop_antieu))

## p-value blocks
p_two_sided_prop_antieu_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariate
tmplm <- lm(prop_antieu ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_lm_antieu, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_antieu$eligtreat, y = na_epsoc_lm_antieu$prop_antieu))

## p-value blocks
p_two_sided_prop_antieu <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))




## ----itt-partisan, include=FALSE, results="asis", eval=FALSE-------------



stargazer(epsoc.data_lm_rle_uni,
          epsoc.data_lm_gre_uni,
          epsoc.data_lm_ppr_uni,
          epsoc.data_lm_antieu_uni,
          se=list(BM_epsoc_rle_uni$adj.se,
                  BM_epsoc_gre_uni$adj.se,
                  BM_epsoc_ppr_uni$adj.se,
                  BM_epsoc_antieu_uni$adj.se),
          single.row = TRUE,
          digits=2,
          omit.stat=c("f", "ser"),
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_prop_rle_uni, digits=2),
                             round(p_two_sided_prop_gre_uni, digits=2),
                             round(p_two_sided_prop_ppr_uni, digits=2),
                             round(p_two_sided_prop_antieu_uni, digits=2))),
          dep.var.caption=c("ITT"),
          dep.var.labels   = c("Rad. Left", "Green", "Pop.Right", "Anti-EU"),
          covariate.labels = c("Eligible"),
          title=c("Effect of eligibility in EP 2004 on closeness to challenger parties"),
          header=FALSE, table.placement = "tb", notes=NULL,
          font.size="small",
          label="tab:epsoc.prop_uni")



stargazer(epsoc.data_lm_rle,
          epsoc.data_lm_gre,
          epsoc.data_lm_ppr,
          epsoc.data_lm_antieu,
          se=list(BM_epsoc_rle$adj.se,
                  BM_epsoc_gre$adj.se,
                  BM_epsoc_ppr$adj.se,
                  BM_epsoc_antieu$adj.se),
          single.row = TRUE,
          digits=2,
          omit.stat=c("f", "ser"),
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_prop_rle, digits=2),
                             round(p_two_sided_prop_gre, digits=2),
                             round(p_two_sided_prop_ppr, digits=2),
                             round(p_two_sided_prop_antieu, digits=2))),
          dep.var.caption=c("ITT"),
          dep.var.labels   = c("Rad. Left", "Green", "Pop.Right", "Anti-EU"),
          covariate.labels = c("Eligible", "Education", "Household with P'", "Standard of Living",
                               "Religious", "Higher Edu. Parents", "Gender", "Urban-Rural", "Voting Habits Parents", "Pol. Interest Parents"),
          title=c("Effect of eligibility in EP 2004 on closeness to challenger parties"),
          header=FALSE, table.placement = "tb", notes=NULL,
          font.size="small",
          label="tab:epsoc.prop")




## ----cace-partisan, echo=FALSE, warning=FALSE, message=FALSE, results="asis"----


### cace outcome green partisan ties ###

cace_epsoc9_gre.data <- na_epsoc_lm_gre

fit.2sls_gre <- ivreg(prop_gre ~ q11 + as.factor(country)
                     | eligtreat + as.factor(country),
                     data=cace_epsoc9_gre.data, weights = ipw)
est_fit.2sls_gre <- commarobust_tidy(fit.2sls_gre, clust_var = cace_epsoc9_gre.data$country)



fit.2sls_gre_cov <- ivreg(prop_gre ~ q11 +  highedu + Q44B + q45 + q47 + HIDIPLO +
                            male  +q48 + votingp + interestp + schoolengage + as.factor(country)
                    | eligtreat + highedu + Q44B + q45 + q47 + HIDIPLO +
                  male + q48 + votingp + interestp + schoolengage + as.factor(country) , data=cace_epsoc9_gre.data, weights = ipw)
est_fit.2sls_gre_cov <- commarobust_tidy(fit.2sls_gre_cov, clust_var = cace_epsoc9_gre.data$country)





### cace outcome populist right partisan ties ###

cace_epsoc9_ppr.data <- na_epsoc_lm_ppr

fit.2sls_ppr <- ivreg(prop_ppr ~ q11 + as.factor(country)
                     | eligtreat + as.factor(country),
                     data=cace_epsoc9_ppr.data, weights = ipw)
est_fit.2sls_ppr <- commarobust_tidy(fit.2sls_ppr, clust_var = cace_epsoc9_ppr.data$country)


fit.2sls_ppr_cov <- ivreg(prop_ppr ~ q11  +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage +
                       as.factor(country)
                    | eligtreat  +
                    highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage +
                      as.factor(country),  data=cace_epsoc9_ppr.data, weights = ipw)
est_fit.2sls_ppr_cov <- commarobust_tidy(fit.2sls_ppr_cov, clust_var = cace_epsoc9_ppr.data$country)



### cace outcome radleft partisan ties ###

cace_epsoc9_rle.data <- na_epsoc_lm_rle


fit.2sls_rle <- ivreg(prop_rle ~ q11 + as.factor(country)
                     | eligtreat + as.factor(country),
                     data=cace_epsoc9_rle.data, weights = ipw)
est_fit.2sls_rle <- commarobust_tidy(fit.2sls_rle, clust_var = cace_epsoc9_rle.data$country)



fit.2sls_rle_cov <- ivreg(prop_rle ~ q11  +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage +
                       as.factor(country)
                    | eligtreat +
                    highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage +
                      as.factor(country) ,  data=cace_epsoc9_rle.data, weights = ipw)
est_fit.2sls_rle_cov <- commarobust_tidy(fit.2sls_rle_cov, clust_var = cace_epsoc9_rle.data$country)





### cace outcome anti-eu partisan ties ###

cace_epsoc9_antieu.data <- na_epsoc_lm_antieu

fit.2sls_antieu <- ivreg(prop_antieu ~ q11 + as.factor(country)
                     | eligtreat + as.factor(country),
                     data=cace_epsoc9_antieu.data, weights = ipw)
est_fit.2sls_antieu <- commarobust_tidy(fit.2sls_antieu, clust_var = cace_epsoc9_antieu.data$country)



fit.2sls_antieu_cov <- ivreg(prop_antieu ~ q11 +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage +
                     as.factor(country)
                    | eligtreat + as.factor(country) +
                    highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage +
                      as.factor(country),  data=cace_epsoc9_antieu.data, weights = ipw)
est_fit.2sls_antieu_cov <- commarobust_tidy(fit.2sls_antieu_cov, clust_var = cace_epsoc9_antieu.data$country)





## ----table-itt-cace-partisan, include=FALSE, results="asis"--------------

#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE 2: Effect of first-time EP eligibility and voting on closeness to challenger parties ###
#<!-- ------------------------------------------------------------------------------------------ -->


stargazer(epsoc.data_lm_rle,
          epsoc.data_lm_gre,
          epsoc.data_lm_ppr,
          epsoc.data_lm_antieu,
          se=list(BM_epsoc_rle$adj.se,
                  BM_epsoc_gre$adj.se,
                  BM_epsoc_ppr$adj.se,
                  BM_epsoc_antieu$adj.se),
          single.row = TRUE,
          digits=2,
          omit.stat=c("f", "ser"),
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_prop_rle, digits=2),
                             round(p_two_sided_prop_gre, digits=2),
                             round(p_two_sided_prop_ppr, digits=2),
                             round(p_two_sided_prop_antieu, digits=2))),
          dep.var.caption=c("Dependent variable: closeness to challenger parties"),
          dep.var.labels   = c("Rad. Left", "Green", "Pop.Right", "Anti-EU"),
          covariate.labels = c("Eligible"
          #  "Education", "Household with P'",
          #                    "Standard of Living",
          #                    "Religious", "Higher Edu. Parents", "Gender",
          #                    "Urban-Rural", "Voting Habits Parents",
          #                    "Pol. Interest Parents"
            ),
           omit = c("Constant", "country", "highedu", "Q44B",
                  "q45", "q47", "HIDIPLO", "male", "q48",
                  "votingp", "interestp", "schoolengage"),
          title=c("Effect of first-time EP eligibilty and voting on closeness to challenger parties"),
          header=FALSE, table.placement = "tb", notes=NULL,
          font.size="small",
          label="tab:epsoc.prop", 
          type = "text", out="./table2_1.txt")

stargazer(fit.2sls_rle_cov,
          fit.2sls_gre_cov,
          fit.2sls_ppr_cov,
          fit.2sls_antieu_cov,
            se = list(est_fit.2sls_rle_cov[,3],
                  est_fit.2sls_gre_cov[,3],
                  est_fit.2sls_ppr_cov[,3],
                  est_fit.2sls_antieu_cov[,3]),
            p = list(est_fit.2sls_rle_cov[,5],
                  est_fit.2sls_gre_cov[,5],
                  est_fit.2sls_ppr_cov[,5],
                  est_fit.2sls_antieu_cov[,5]),
         omit = c("Constant", "country", "highedu", "Q44B",
                  "q45", "q47", "HIDIPLO", "male", "q48",
                  "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          dep.var.caption=c("Dependent variable: closeness to challenger parties"),
          dep.var.labels   = c("Rad. Left", "Green", "Rad. Right", "Anti-EU"),
          covariate.labels = c("Voting"),
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_prop_rle, digits=2),
                             round(p_two_sided_prop_gre, digits=2),
                             round(p_two_sided_prop_ppr, digits=2),
                             round(p_two_sided_prop_antieu, digits=2)),
                           c("Age", "[17.25-18.75]", "[17.25-18.75]", "[17.25-18.75]",
                             "[17.25-18.75]"),
                           c("Controls", "yes", "yes", "yes", "yes")),
         title=c("Effect of first-time EP eligibilty and voting on closeness to challenger parties"),
         header=FALSE, table.placement = "tb", notes=NULL,
          font.size="small",
         label = c("tab:ivpartisan"),
          single.row = TRUE,
          digits=2, 
         type = "text", out="./table2_2.txt")   





## ----tiny-4m-bd, warning=FALSE, message=FALSE, results="asis", include=FALSE----

set.seed(64558030)


int_lm_epsoc_tiny4 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=na_epsoc4_q1.data, weights = ipw)
BM_epsoc_tiny4_int <- BMlmSE(int_lm_epsoc_tiny4, clustervar = as.factor(na_epsoc4_q1.data$country))



## randomization inference
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc4_q1.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility 
tmplm <- lm(q1 ~ highedu + Q44B + q45 + q47 + HIDIPLO +
              male  +q48 + votingp + interestp + schoolengage  +
              as.factor(country) , data = na_epsoc4_q1.data, weights=ipw, clusters=country)
na_epsoc4_q1.data = na_epsoc4_q1.data %>% mutate(resid_int = resid(tmplm))
est_itt <- coef(lm(resid_int ~ eligtreat, data = na_epsoc4_q1.data))[["eligtreat"]]
rand_dist <- replicate(10^3, permute_block_sharp_null(z = na_epsoc4_q1.data$eligtreat, y = na_epsoc4_q1.data$resid_int))

## p-value
p_two_sided_epsoc_tiny4 <- 2* min(mean(rand_dist >= est_itt), mean(rand_dist <= est_itt))



### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1])
cace_epsoc4_q1.data <- epsoc4.data %>% select_(.dots = col_vector)
cace_epsoc4_q1.data$q11[cace_epsoc4_q1.data$eligtreat==0] <- 0

cace_epsoc4_q1.data <- na.omit(cace_epsoc4_q1.data)
names(cace_epsoc4_q1.data) <- col_vector
cace_epsoc4_q1.data = left_join(cace_epsoc4_q1.data, na_epsoc4_q1.data) ##




fit_tiny4bd.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_epsoc4_q1.data, weights = ipw)

est_fit_tiny4bd.2sls_cov <- commarobust_tidy(fit_tiny4bd.2sls_cov, clust_var = cace_epsoc4_q1.data$country)




## ----polint-longterm-itt, echo=FALSE, warning=FALSE, message=FALSE, results="asis", include=FALSE----


set.seed(64558030)

##effect of EP 1999 election

ofage99 <- euyou.data %>% filter(eligdate > as.Date("1998-09-01", format = '%Y-%m-%d') &
                              eligdate < as.Date("2000-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
ofage99 <- ofage99 %>% filter(eligdate != as.Date("1999-06-15", format = '%Y-%m-%d'))
ofage99$eligtreat <- if_else(ofage99$eligdate < as.Date("1999-06-15", format= '%Y-%m-%d'), 1, 0)
#exclude Slovakia and Estonia
ofage99 <- ofage99 %>% filter(country!=2 & country!=7)
#exclude Finnish people turned 18 before their national election in 1999 (1999-03-21)
ofage99$finneindicator <- if_else(ofage99$country==3 & ofage99$eligdate <= as.Date("1999-03-21", format = '%Y-%m-%d'),1,0 )
ofage99 <- ofage99 %>% filter(finneindicator!=1)
ofage99$finneindicator <- NULL

# generate identifier variable
ofage99 = ofage99 %>%
 mutate(ID99 = seq(1:nrow(ofage99)))


blocks <- unique(ofage99$country[!is.na(ofage99$country)])
itblock <- sapply(blocks, function(x) {
    length(ofage99$eligtreat[ofage99$eligtreat==1 & ofage99$country==x])/(length(ofage99$eligtreat[ofage99$country==x]))
  })

p_itblock <- sapply(blocks, function(x) {
    length(ofage99$eligtreat[ofage99$eligtreat==1 & ofage99$country==x])/(length(ofage99$eligtreat[ofage99$country==x]))
  }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)


itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
ofage99 <- right_join(ofage99, p_itblock.data)
ofage99$ipw <- (1/ofage99$p_itblock)*ofage99$eligtreat +
                    (1/(1-ofage99$p_itblock))*(1-ofage99$eligtreat)

colvector <- c("country", "ipw","eligtreat", "q1", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "ID99")
na_ofage99_lm <- ofage99 %>% select_(.dots = colvector)
na_ofage99_lm <- na.omit(na_ofage99_lm)
names(na_ofage99_lm) <- colvector



## CACE 1999

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1], "ID99")
cace_ofage99.data <- ofage99 %>% select_(.dots = col_vector)
cace_ofage99.data$q11[cace_ofage99.data$eligtreat==0] <- 0


cace_ofage99.data <- na.omit(cace_ofage99.data)
names(cace_ofage99.data) <- col_vector
cace_ofage99.data = left_join(cace_ofage99.data, na_ofage99_lm)



## randomization inference 1999
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = cace_ofage99.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility 1999

int_lm_epsoc99 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=cace_ofage99.data, weights = ipw)
BM_epsoc99_int <- BMlmSE(int_lm_epsoc99, clustervar = as.factor(cace_ofage99.data$country))


# randomization inference
tmplm <- lm(q1 ~  highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=cace_ofage99.data, weights = ipw)

cace_ofage99.data  = cace_ofage99.data  %>% mutate(resid_int = resid(tmplm))
est_itt <- coef(lm(resid_int ~ eligtreat, data = cace_ofage99.data))[["eligtreat"]]
rand_dist <- replicate(10^3, permute_block_sharp_null(z = cace_ofage99.data$eligtreat, y = cace_ofage99.data$resid_int))

## p-value
p_two_sided_ofage99 <- 2* min(mean(rand_dist >= est_itt), mean(rand_dist <= est_itt))




# IV 1999

fit_ofage99.2sls_cov <- ivreg(q1 ~ q11 +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp +
                       schoolengage +
                       as.factor(country)
                    | eligtreat  +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  + q48 + votingp + interestp +
                      schoolengage +
                       as.factor(country),  data=cace_ofage99.data, weights = ipw)
est_fit_ofage99.2sls_cov <- commarobust_tidy(fit_ofage99.2sls_cov, clust_var = cace_ofage99.data$country)


ofage99_randominfp <- c(round(p_two_sided_ofage99, digits=3), NULL)







## ----pol-int-placebo, echo=FALSE, warning=FALSE, message=FALSE, results='asis', include=FALSE----

set.seed(64558030)

#### random events ####
#### 2000

#2000
p_ofage00 <- euyou.data%>% filter(eligdate > as.Date("1999-09-01", format = '%Y-%m-%d') &
                              eligdate < as.Date("2001-03-31", format = '%Y-%m-%d'))
p_ofage00 <- p_ofage00 %>% filter(eligdate != as.Date("2000-06-15", format = '%Y-%m-%d'))
p_ofage00$eligtreat <- if_else(p_ofage00$eligdate < as.Date("2000-06-15", format= '%Y-%m-%d'), 1, 0)
p_ofage00$q11[p_ofage00$eligtreat==0] <- 0


# generate identifier variable
p_ofage00 = p_ofage00 %>%
 mutate(ID00 = seq(1:nrow(p_ofage00)))


colvector <- c("country", "eligtreat", "q1", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID00")
pna_ofage00_lm <- p_ofage00 %>% select_(.dots = colvector)
pna_ofage00_lm <- na.omit(pna_ofage00_lm)
names(pna_ofage00_lm) <- colvector

blocks <- unique(pna_ofage00_lm$country[!is.na(pna_ofage00_lm$country)])
p_itblock <- sapply(blocks, function(x) {
    length(pna_ofage00_lm$eligtreat[pna_ofage00_lm$eligtreat==1 & pna_ofage00_lm$country==x])/(length(pna_ofage00_lm$eligtreat[pna_ofage00_lm$country==x]))
  })
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage00_lm <- right_join(pna_ofage00_lm, p_itblock.data)
pna_ofage00_lm$ipw <- (1/pna_ofage00_lm$p_itblock)*pna_ofage00_lm$eligtreat +
                    (1/(1-pna_ofage00_lm$p_itblock))*(1-pna_ofage00_lm$eligtreat)



## CACE 2000

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1], "ID00")
cace_p_ofage00.data <- p_ofage00 %>% select_(.dots = col_vector)
cace_p_ofage00.data$q11[cace_p_ofage00.data$eligtreat==0] <- 0

cace_p_ofage00.data <- na.omit(cace_p_ofage00.data)
names(cace_p_ofage00.data) <- col_vector
cace_p_ofage00.data = left_join(cace_p_ofage00.data, pna_ofage00_lm)



lm_p_ofage00 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=cace_p_ofage00.data, weights = ipw)
BM_ofage00 <- BMlmSE(lm_p_ofage00, clustervar = as.factor(cace_p_ofage00.data$country))



fit_p_ofage00.2sls <- ivreg(q1 ~ q11 + as.factor(country)
                    | eligtreat + as.factor(country), data=cace_p_ofage00.data, , weights = ipw)
est_fit_p_ofage00.2sls <- commarobust_tidy(fit_p_ofage00.2sls, clust_var = cace_p_ofage00.data$country)


fit_p_ofage00.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage00.data, weights = ipw)
est_fit_p_ofage00.2sls_cov <- commarobust_tidy(fit_p_ofage00.2sls_cov, clust_var = cace_p_ofage00.data$country)






#### 2001
#2001
p_ofage01 <- euyou.data%>% filter(eligdate > as.Date("2000-09-01", format = '%Y-%m-%d') &
                                eligdate < as.Date("2002-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage01 <- p_ofage01 %>% filter(eligdate != as.Date("2001-06-15", format = '%Y-%m-%d'))
p_ofage01$eligtreat <- if_else(p_ofage01$eligdate < as.Date("2001-06-15", format= '%Y-%m-%d'), 1, 0)
p_ofage01$q11[p_ofage01$eligtreat==0] <- 0


# generate identifier variable
p_ofage01 = p_ofage01 %>%
 mutate(ID01 = seq(1:nrow(p_ofage01)))


colvector <- c("country", "eligtreat", "q1", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID01")
pna_ofage01_lm <- p_ofage01 %>% select_(.dots = colvector)
pna_ofage01_lm <- na.omit(pna_ofage01_lm)
names(pna_ofage01_lm) <- colvector

blocks <- unique(pna_ofage01_lm$country[!is.na(pna_ofage01_lm$country)])
p_itblock <- sapply(blocks, function(x) {
    length(pna_ofage01_lm$eligtreat[pna_ofage01_lm$eligtreat==1 & pna_ofage01_lm$country==x])/(length(pna_ofage01_lm$eligtreat[pna_ofage01_lm$country==x]))
  })
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage01_lm <- right_join(pna_ofage01_lm, p_itblock.data)
pna_ofage01_lm$ipw <- (1/pna_ofage01_lm$p_itblock)*pna_ofage01_lm$eligtreat +
                    (1/(1-pna_ofage01_lm$p_itblock))*(1-pna_ofage01_lm$eligtreat)



## CACE 2001

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1], "ID01")
cace_p_ofage01.data <- p_ofage01 %>% select_(.dots = col_vector)
cace_p_ofage01.data$q11[cace_p_ofage01.data$eligtreat==0] <- 0

cace_p_ofage01.data <- na.omit(cace_p_ofage01.data)
names(cace_p_ofage01.data) <- col_vector
cace_p_ofage01.data = left_join(cace_p_ofage01.data, pna_ofage01_lm)



lm_p_ofage01 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male + q48 + votingp + interestp + schoolengage + as.factor(country), data=cace_p_ofage01.data, weights = ipw)
BM_ofage01 <- BMlmSE(lm_p_ofage01, clustervar = as.factor(cace_p_ofage01.data$country))



fit_p_ofage01.2sls <- ivreg(q1 ~ q11 + as.factor(country)
                    | eligtreat + as.factor(country), data=cace_p_ofage01.data, weights = ipw)
est_fit_p_ofage01.2sls <- commarobust_tidy(fit_p_ofage01.2sls, clust_var = cace_p_ofage01.data$country)


fit_p_ofage01.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage01.data, weights = ipw)
est_fit_p_ofage01.2sls_cov <- commarobust_tidy(fit_p_ofage01.2sls_cov, clust_var = cace_p_ofage01.data$country)







#### 2002
p_ofage02 <- euyou.data%>% filter(eligdate > as.Date("2001-09-01", format = '%Y-%m-%d') &
                                eligdate < as.Date("2003-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage02 <- p_ofage02 %>% filter(eligdate != as.Date("2002-06-15", format = '%Y-%m-%d'))
p_ofage02$eligtreat <- if_else(p_ofage02$eligdate < as.Date("2002-06-15", format= '%Y-%m-%d'), 1, 0)

p_ofage02$q11[p_ofage02$eligtreat==0] <- 0



# generate identifier variable
p_ofage02 = p_ofage02 %>%
 mutate(ID02 = seq(1:nrow(p_ofage02)))


colvector <- c("country", "eligtreat", "q1", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID02")
pna_ofage02_lm <- p_ofage02 %>% select_(.dots = colvector)
pna_ofage02_lm <- na.omit(pna_ofage02_lm)
names(pna_ofage02_lm) <- colvector

blocks <- unique(pna_ofage02_lm$country[!is.na(pna_ofage02_lm$country)])
p_itblock <- sapply(blocks, function(x) {
    length(pna_ofage02_lm$eligtreat[pna_ofage02_lm$eligtreat==1 & pna_ofage02_lm$country==x])/(length(pna_ofage02_lm$eligtreat[pna_ofage02_lm$country==x]))
  })
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage02_lm <- right_join(pna_ofage02_lm, p_itblock.data)
pna_ofage02_lm$ipw <- (1/pna_ofage02_lm$p_itblock)*pna_ofage02_lm$eligtreat +
                    (1/(1-pna_ofage02_lm$p_itblock))*(1-pna_ofage02_lm$eligtreat)



## CACE 2002

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1], "ID02")
cace_p_ofage02.data <- p_ofage02 %>% select_(.dots = col_vector)
cace_p_ofage02.data$q11[cace_p_ofage02.data$eligtreat==0] <- 0

cace_p_ofage02.data <- na.omit(cace_p_ofage02.data)
names(cace_p_ofage02.data) <- col_vector
cace_p_ofage02.data = left_join(cace_p_ofage02.data, pna_ofage02_lm)



lm_p_ofage02 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=cace_p_ofage02.data, weights = ipw)
BM_ofage02 <- BMlmSE(lm_p_ofage02, clustervar = as.factor(cace_p_ofage02.data$country))



fit_p_ofage02.2sls <- ivreg(q1 ~ q11 + as.factor(country)
                    | eligtreat + as.factor(country), data=cace_p_ofage02.data, weights = ipw)
est_fit_p_ofage02.2sls <- commarobust_tidy(fit_p_ofage02.2sls, clust_var = cace_p_ofage02.data$country)


fit_p_ofage02.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage02.data, weights = ipw)
est_fit_p_ofage02.2sls_cov <- commarobust_tidy(fit_p_ofage02.2sls_cov, clust_var = cace_p_ofage02.data$country)





#### 2003
p_ofage03 <- euyou.data%>% filter(eligdate > as.Date("2002-09-01", format = '%Y-%m-%d') &
                                eligdate < as.Date("2004-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage03 <- p_ofage03 %>% filter(eligdate != as.Date("2003-06-15", format = '%Y-%m-%d'))
p_ofage03$eligtreat <- if_else(p_ofage03$eligdate < as.Date("2003-06-15", format= '%Y-%m-%d'), 1, 0)
p_ofage03$q11[p_ofage03$eligtreat==0] <- 0



# generate identifier variable
p_ofage03 = p_ofage03 %>%
 mutate(ID03 = seq(1:nrow(p_ofage03)))


colvector <- c("country", "eligtreat", "q1", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID03")
pna_ofage03_lm <- p_ofage03 %>% select_(.dots = colvector)
pna_ofage03_lm <- na.omit(pna_ofage03_lm)
names(pna_ofage03_lm) <- colvector

blocks <- unique(pna_ofage03_lm$country[!is.na(pna_ofage03_lm$country)])
p_itblock <- sapply(blocks, function(x) {
    length(pna_ofage03_lm$eligtreat[pna_ofage03_lm$eligtreat==1 & pna_ofage03_lm$country==x])/(length(pna_ofage03_lm$eligtreat[pna_ofage03_lm$country==x]))
  })
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage03_lm <- right_join(pna_ofage03_lm, p_itblock.data)
pna_ofage03_lm$ipw <- (1/pna_ofage03_lm$p_itblock)*pna_ofage03_lm$eligtreat +
                    (1/(1-pna_ofage03_lm$p_itblock))*(1-pna_ofage03_lm$eligtreat)



## CACE 2003

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1], "ID03")
cace_p_ofage03.data <- p_ofage03 %>% select_(.dots = col_vector)
cace_p_ofage03.data$q11[cace_p_ofage03.data$eligtreat==0] <- 0

cace_p_ofage03.data <- na.omit(cace_p_ofage03.data)
names(cace_p_ofage03.data) <- col_vector
cace_p_ofage03.data = left_join(cace_p_ofage03.data, pna_ofage03_lm)



lm_p_ofage03 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=cace_p_ofage03.data, weights = ipw)
BM_ofage03 <- BMlmSE(lm_p_ofage03, clustervar = as.factor(cace_p_ofage03.data$country))



fit_p_ofage03.2sls <- ivreg(q1 ~ q11 + as.factor(country)
                    | eligtreat + as.factor(country), data=cace_p_ofage03.data, weights = ipw)
est_fit_p_ofage03.2sls <- commarobust_tidy(fit_p_ofage03.2sls, clust_var = cace_p_ofage03.data$country)


fit_p_ofage03.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage03.data, weights = ipw)
est_fit_p_ofage03.2sls_cov <- commarobust_tidy(fit_p_ofage03.2sls_cov, clust_var = cace_p_ofage03.data$country)



#### 2005
p_ofage05 <- euyou.data%>% filter(eligdate > as.Date("2004-09-01", format = '%Y-%m-%d') &
                                eligdate < as.Date("2006-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage05 <- p_ofage05 %>% filter(eligdate != as.Date("2005-06-15", format = '%Y-%m-%d'))
p_ofage05$eligtreat <- if_else(p_ofage05$eligdate < as.Date("2005-06-15", format= '%Y-%m-%d'), 1, 0)

# create a variable measuring the future "compliance" among those who are future eligibles
p_ofage05$p_q11 <- NULL
p_ofage05$p_q11[p_ofage05$eligtreat==0] <- 0
p_ofage05$p_q11[p_ofage05$eligtreat==1] <- rbinom(length(p_ofage05$eligtreat==1), 1, 0.485)






# generate identifier variable
p_ofage05 = p_ofage05 %>%
 mutate(ID05 = seq(1:nrow(p_ofage05)))


colvector <- c("country", "eligtreat", "q1", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "p_q11", "ID05")
pna_ofage05_lm <- p_ofage05 %>% select_(.dots = colvector)
pna_ofage05_lm <- na.omit(pna_ofage05_lm)
names(pna_ofage05_lm) <- colvector

blocks <- unique(pna_ofage05_lm$country[!is.na(pna_ofage05_lm$country)])
p_itblock <- sapply(blocks, function(x) {
    length(pna_ofage05_lm$eligtreat[pna_ofage05_lm$eligtreat==1 & pna_ofage05_lm$country==x])/(length(pna_ofage05_lm$eligtreat[pna_ofage05_lm$country==x]))
  })
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage05_lm <- right_join(pna_ofage05_lm, p_itblock.data)
pna_ofage05_lm$ipw <- (1/pna_ofage05_lm$p_itblock)*pna_ofage05_lm$eligtreat +
                    (1/(1-pna_ofage05_lm$p_itblock))*(1-pna_ofage05_lm$eligtreat)



## CACE 2005

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "p_q11", covs[,1], "ID05")
cace_p_ofage05.data <- p_ofage05 %>% select_(.dots = col_vector)


cace_p_ofage05.data$p_q11[cace_p_ofage05.data$eligtreat==0] <- 0
cace_p_ofage05.data <- na.omit(cace_p_ofage05.data)
names(cace_p_ofage05.data) <- col_vector

cace_p_ofage05.data = left_join(cace_p_ofage05.data, pna_ofage05_lm)



lm_p_ofage05 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male + q48 + votingp + interestp + schoolengage + as.factor(country) , data=cace_p_ofage05.data)
BM_ofage05 <- BMlmSE(lm_p_ofage05, clustervar = as.factor(cace_p_ofage05.data$country))



fit_p_ofage05.2sls <- ivreg(q1 ~ p_q11 + as.factor(country)
                    | eligtreat + as.factor(country), data=cace_p_ofage05.data, weights = ipw)
est_fit_p_ofage05.2sls <- commarobust_tidy(fit_p_ofage05.2sls, clust_var = cace_p_ofage05.data$country)


fit_p_ofage05.2sls_cov <- ivreg(q1 ~ p_q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage05.data, weights = ipw)
est_fit_p_ofage05.2sls_cov <- commarobust_tidy(fit_p_ofage05.2sls_cov, clust_var = cace_p_ofage05.data$country)




#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE 3: Effect of eligibility and voting in placebo EP election years on political interest ###
#<!-- ------------------------------------------------------------------------------------------ -->


stargazer(lm_p_ofage00,
          lm_p_ofage01,
          lm_p_ofage02,
          lm_p_ofage03,
          lm_p_ofage05,
          se=list(BM_ofage00$adj.se,
                  BM_ofage01$adj.se,
                  BM_ofage02$adj.se,
                  BM_ofage03$adj.se ,
                  BM_ofage05$adj.se
                  ),
          omit = c("Constant" ,"country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          dep.var.caption=c("Dependent variable: political interest"),
          covariate.labels = c("Eligible"),
          dep.var.labels   = c(""),
          title=c("Effect of eligibility and voting in placebo EP years on political interest"),
          header=FALSE, table.placement = "tb", notes=NULL,
          font.size="small",
          single.row = TRUE,
          digits=2, 
          type = "text", out="./table3_1.txt"
          )


stargazer(fit_p_ofage00.2sls_cov,
          fit_p_ofage01.2sls_cov,
          fit_p_ofage02.2sls_cov,
          fit_p_ofage03.2sls_cov,
          fit_p_ofage05.2sls_cov,
          se = list(est_fit_p_ofage00.2sls_cov[,3],
                    est_fit_p_ofage01.2sls_cov[,3],
                    est_fit_p_ofage02.2sls_cov[,3],
                    est_fit_p_ofage03.2sls_cov[,3],
                    est_fit_p_ofage05.2sls_cov[,3]),
          p = list(est_fit_p_ofage00.2sls_cov[,5],
                    est_fit_p_ofage01.2sls_cov[,5],
                    est_fit_p_ofage02.2sls_cov[,5],
                    est_fit_p_ofage03.2sls_cov[,5],
                    est_fit_p_ofage05.2sls_cov[,5]),
          omit.stat=c("f", "ser", "adj.rsq", "rsq"),
          omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          dep.var.caption=c("Dependent variable: political interest"),
          dep.var.labels   = c(""),
          covariate.labels = c("Voting"),
          title=c("Effect of eligibility and voting in placebo EP years on political interest"),
          header=FALSE, table.placement = "tb", notes=NULL,
          label = c("tab:placivint"),
          font.size="small",
          single.row = TRUE,
          digits=2, 
          type = "text", out="./table3_2.txt")
          






## ----tiny-long-table, warning=FALSE, message=FALSE, results="asis"-------

#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE 4: Effect of first-time EP eligibility and voting on interest in politics (model 1 and 2
#             tiny bandwidth, model 3 and 4 long-term effects)                                 ###
#<!-- ------------------------------------------------------------------------------------------ -->


stargazer(int_lm_epsoc_tiny4,
          fit_tiny4bd.2sls_cov,
          int_lm_epsoc99,
          fit_ofage99.2sls_cov,
          se=list(BM_epsoc_tiny4_int$adj.se,
                  est_fit_tiny4bd.2sls_cov[,3],
                  BM_epsoc99_int$adj.se,
                  est_fit_ofage99.2sls_cov[,3]),
          p = list(NULL,
                   est_fit_tiny4bd.2sls_cov[,5],
                  NULL,
                  est_fit_ofage99.2sls_cov[,5]),
          omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          covariate.labels = c("Eligible", "Voting"),
          dep.var.caption=c("Dependent variable: political interest"),
          dep.var.labels.include = FALSE,
          add.lines = list(c("Random. Inf. p",
                             round(p_two_sided_epsoc_tiny4, digits=2), "",
                             round(p_two_sided_ofage99, digits=2), ""),
                           c("Age",
                             "[17.67-18.33]", "[17.67-18.33]",
                             "[22.25-23.75]", "[22.25-23.75]"),
                           c("Method", "OLS", "IV", "OLS", "IV"),
                           c("Controls", "yes", "yes", "yes", "yes")),
          #, "Education", "Household with Parents", "Standard of Living", "Religious", "Higher Education Parents", "Gender", "Urban-Rural", "Voting Habits Parents", "Political Interest Parents", "Civic Engagement in School"),
          title=c("Effect of first-time EP eligibility and voting on interest in politics (model 1 and 2 tiny bandwidth, models 3 and 4 long-term effects)"),
          order = c(1, 12, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
          header=FALSE, table.placement = "tb", notes.append = FALSE,
          model.names = FALSE,
          notes = c("$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. Constant and country fixed-effects omitted from output. Model 2 and 4 show the causal average complier effect (CACE) from using the eligibility as instrument. Bell-McCaffrey bias adjusted robust SE in parentheses. P-values of two-tailed tests based on randomisation inference (permutation within countries). Inverse probability weights accounting for different probabilities of assignment to treatment and control conditions between country blocks."),
          notes.label = "",
          label = c("tab:robust"),
          font.size="small",
          single.row = TRUE,
          digits=2,
          column.sep.width = "0pt", 
          type = "text", out="./table4.txt"  
                )




## ----refmgr references, results="asis", echo=FALSE-----------------------
# PrintBibliography(bib)


## ----pol-int-eur-data, message=FALSE, warning=FALSE, echo=FALSE----------



## pol interst in european politics

set.seed(64558030)

### outcome political interest ###
col_vector <- c("country", "eligtreat", "q2_3", covs[,1], "ID")
na_epsoc_q2_3.data <- epsoc.data %>% select_(.dots = col_vector)
na_epsoc_q2_3.data <- na.omit(na_epsoc_q2_3.data)
names(na_epsoc_q2_3.data) <- col_vector

### IPW na_epsoc_q2_3.data ###
blocks <- unique(na_epsoc_q2_3.data$country[!is.na(na_epsoc_q2_3.data$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
    length(na_epsoc_q2_3.data$eligtreat[na_epsoc_q2_3.data$eligtreat==1 & na_epsoc_q2_3.data$country==x])/(length(na_epsoc_q2_3.data$eligtreat[na_epsoc_q2_3.data$country==x]))
  }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment Elig 2004", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc_q2_3.data <- right_join(na_epsoc_q2_3.data, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_q2_3.data$ipw <- (1/na_epsoc_q2_3.data$p_itblock)*na_epsoc_q2_3.data$eligtreat +
                    (1/(1-na_epsoc_q2_3.data$p_itblock))*(1-na_epsoc_q2_3.data$eligtreat)





## ----descriptives-avgint, echo=FALSE, warning=FALSE, message=FALSE,  results='asis'----

#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE A.1: Treatment condition and political interest across countries  ###
#<!-- ------------------------------------------------------------------------------------------ -->


na_epsoc_q1.data %<>% mutate(Country= case_when(
  country==2 ~ "Estonia",
  country==3 ~ "Finland",
  country==4 ~ "France",
  country==5 ~ "Germany",
  country==6 ~ "Italy",
  country==8 ~ "United Kingdom"
 ))

kable(intavg_countries.df <- na_epsoc_q1.data %>% group_by(Country) %>%
        summarise(N = n(),
                  treated = sum(eligtreat),
                  meanint = mean(q1),
                  sdint = sd(q1)),    ,
                  digits=2,
      col.names = c("Country", "N", "Eligible",
                    "Mean Pol. Interest", "SD Pol. Interest"),
      caption = "Descriptive statistics of political interest across countries",
      booktabs = T  ) %>%
  row_spec(nrow(intavg_countries.df), hline_after=T) %>%
  kable_styling(latex_options= c("HOLD_position"))



## ----descriptive-table-appendix, results="asis", eval=FALSE--------------

#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE A.3: Summary Statistics ###
#<!-- ------------------------------------------------------------------------------------------ -->


# join the dataframes to include the various different subsets

descriptives.data <-
  full_join((cace_epsoc9_ppr.data %>% dplyr::select(c("prop_ppr", "ID")) ), na_epsoc9_q1.data, by=c("ID"))

descriptives.data <-
  full_join((cace_epsoc9_q1.data %>% dplyr::select(c("q11", "ID")) ), descriptives.data, by=c("ID"))

descriptives.data <-
  full_join((cace_epsoc9_gre.data %>% dplyr::select(c("prop_gre", "ID")) ), descriptives.data, by=c("ID"))

descriptives.data <-
  full_join((cace_epsoc9_rle.data %>% dplyr::select(c("prop_rle", "ID")) ), descriptives.data, by=c("ID"))

descriptives.data <-
  full_join((cace_epsoc9_antieu.data %>% dplyr::select(c("prop_antieu", "ID")) ), descriptives.data, by=c("ID"))

descriptives.data <-
  full_join((na_epsoc_q2_3.data %>% dplyr::select(c("q2_3", "ID"))), descriptives.data, by=c("ID"))


# Descriptive table appendix


#create country-dummies
for(level in unique(descriptives.data$country)){
  descriptives.data[paste("country", level, sep = "")] <- ifelse(descriptives.data$country == level, 1, 0)
}


descriptives.data <- descriptives.data[c("eligtreat", "q1", "q2_3", "q11", "prop_rle", "prop_gre", "prop_ppr", "prop_antieu", "male", "q48", "q45", "q47", "HIDIPLO", "Q44B", "highedu", "votingp", "interestp", "schoolengage", "country2", "country3", "country4", "country5", "country6", "country8")]


descriptives.data <- as.data.frame(descriptives.data)

stargazer(descriptives.data,
          summary = TRUE,
          covariate.labels = c("Eligibile", "Political Interest", "European Political Interest",
                               "Voted in EP", "Closeness Radical Left", "Closeness Green Parties",
                               "Closeness Populist Right", "Closeness Anti-EU Parties",
                               "Gender",  "Urban-Rural",  "Standard of Living",
                               "Relgiousness", "Higher Education Parents",  
                               "Household with Parents",  "Education",
                               "Voting Habits Parents", "Political Interest Parents",
                               "Civic Engagement in School",
                               "Estonia", "Finland", "France", "Germany", "Italy", "United Kingdom"),
          title=c("Summary statistics"),
          header=FALSE, table.placement = "tb",
          label = c("tab:app_sumstats"),
          font.size="small",
          digits=2,
          column.sep.width = "3pt", 
          type = "text", out="./tableA3.txt" 
                )




## ----pol-int-eur, message=FALSE, warning=FALSE, echo=FALSE, results="asis"----


set.seed(64558030)


## pol interst in european politics

eurint_uni_epsoc <- lm(q2_3 ~ eligtreat + as.factor(country), data=na_epsoc_q2_3.data, weights = ipw)
BM_epsoc_eurint_uni <- BMlmSE(eurint_uni_epsoc, clustervar = as.factor(na_epsoc_q2_3.data$country))



eurint_lm_epsoc <- lm(q2_3 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) , data=na_epsoc_q2_3.data, weights = ipw, clusters = country)
BM_epsoc_eurint <- BMlmSE(eurint_lm_epsoc, clustervar = as.factor(na_epsoc_q2_3.data$country))




permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_q2_3.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility
#bivariate + countryFE
tmplm <- lm(q2_3 ~ eligtreat, data=na_epsoc_q2_3.data, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_q2_3.data$eligtreat, y = na_epsoc_q2_3.data$q2_3))

## p-value blocks
p_two_sided_q2_epsoc_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariates
tmplm <- lm(q2_3 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_q2_3.data, weights = ipw, clusters = country)
est_itt <- coef(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_q2_3.data$eligtreat, y = na_epsoc_q2_3.data$q2_3))


## p-value blocks
p_two_sided_q2_epsoc <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


### cace outcome european political interest ###
col_vector <- c("country", "eligtreat", "q2_3", "q11", covs[,1], "ID")
cace_epsoc9_q2_3.data <- epsoc9.data %>% select_(.dots = col_vector)
cace_epsoc9_q2_3.data$q11[cace_epsoc9_q2_3.data$eligtreat==0] <- 0


cace_epsoc9_q2_3.data <- na.omit(cace_epsoc9_q2_3.data)
names(cace_epsoc9_q2_3.data) <- col_vector
# add the ipw variable
cace_epsoc9_q2_3.data = left_join(cace_epsoc9_q2_3.data, na_epsoc_q2_3.data)



fit_q2.2sls <- ivreg(q2_3 ~ q11 + as.factor(country)
                  | eligtreat + as.factor(country), data=cace_epsoc9_q2_3.data, weights = ipw)
est_fit_q2.2sls <- commarobust_tidy(fit_q2.2sls, clust_var = cace_epsoc9_q2_3.data$country)


fit_q2.2sls_cov <- ivreg(q2_3 ~ q11 + highedu + Q44B + q45 + q47 + HIDIPLO +
                        male  +q48 + votingp + interestp + schoolengage + as.factor(country)
                    |  eligtreat + highedu + Q44B + q45 + q47 + HIDIPLO +
                        male  +q48 + votingp + interestp + schoolengage + as.factor(country),  data=cace_epsoc9_q2_3.data,  clusters=country, weights = ipw)
est_fit_q2.2sls_cov <- commarobust_tidy(fit_q2.2sls_cov, clust_var = cace_epsoc9_q2_3.data$country)


#same results for first-stage CACE sample? YES

firststage_eurint_uni_epsoc <- lm(q2_3 ~ eligtreat + as.factor(country), data=cace_epsoc9_q2_3.data, weights = ipw)



firststage_eurint_lm_epsoc <- lm(q2_3 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) , data=cace_epsoc9_q2_3.data, weights = ipw, clusters = country)





#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE B.5: Effect of first-time EP eligibility and voting on European political interest   ###
#<!-- ------------------------------------------------------------------------------------------ -->


poleurint_randominfp <- c(round(p_two_sided_q2_epsoc_uni, digits=3), round(p_two_sided_q2_epsoc, digits=3), NULL, NULL)

stargazer(eurint_uni_epsoc,
          eurint_lm_epsoc,
          fit_q2.2sls,
          fit_q2.2sls_cov,
          se=list(BM_epsoc_eurint_uni$adj.se,
                  BM_epsoc_eurint$adj.se,
                  est_fit_q2.2sls[,3],
                  est_fit_q2.2sls_cov[,3]),
          p = list(NULL,
                  NULL,
                  est_fit_q2.2sls[,5],
                  est_fit_q2.2sls_cov[,5]),
          omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          dep.var.caption=c("Dependent variable: European political interest"),
          dep.var.labels.include = FALSE,
          covariate.labels = c("Eligible", "Voting"),
          #, "Education", "Household with Parents", "Standard of Living", "Religious",
          # "Higher Education Parents", "Gender", "Urban-Rural", "Voting Habits Parents",
          # "Political Interest Parents", "Civic Engagement in School"),
          add.lines = list(c("Random. Inf. (p-value)", poleurint_randominfp),
                           c("Age", "[17.25-18.75]", "[17.25-18.75]", "[17.25-18.75]",
                            "[17.25-18.75]"),
                           c("Method", "OLS", "OLS", "IV", "IV"),
                           c("Controls", "no", "yes", "no", "yes")),
          title=c("Effect of first-time EP eligibility and voting on European interest in politics"),
          #order = c(1, 12, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
          header=FALSE, table.placement = "tb",
          notes.append = FALSE,
          model.names = FALSE,
          notes = c("$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. Constant and country fixed-effects omitted from output. Model 3 and 4 show the causal average complier effect (CACE) from using the eligibility as instrument. Bell-McCaffrey bias adjusted robust SE in parentheses. P-values of two-tailed tests based on randomisation inference (permutation within countries). Inverse probability weights accounting for different probabilities of assignment to treatment and control conditions between country blocks."),
          label = c("tab:poleurint"),
          notes.align = "l",
          notes.label = "",
          font.size="small",
          single.row = TRUE,
          digits=2,
          #align=TRUE,
          column.sep.width = "3pt",
          type="text", out="./tableB5.txt"
                )




## ----birthmonths, warning=FALSE, fig.retina=5, out.width="70%", fig.height=3, fig.cap="Mean deviation of actual number of respondents coming of age from expected value", fig.pos="htbp", fig.align="center"----


yearly_ofage.data <- euyou.data %>%  
  filter(q34b >= 1982) %>%
  group_by(Birthyear = q34b) %>%
  dplyr::summarise(N_y_born = n())

EP_Year.list <- c(FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE)
yearly_ofage.data <- cbind(yearly_ofage.data, EP_Year.list)

ymonthly_ofage.data <- euyou.data %>%  
  filter(q34b >= 1982) %>%
  group_by(Birthyear = q34b, Birthmonth= q34a) %>%
  dplyr::summarise(N_born_actual = n())

  #names(yearly_ofage.data) <- c("Birthyear", "Birthmonth", "Total")
  #kable(ymonthly_ofage.data, caption = "Number of respondents per year and month")

ymonthly_ofage.data <- full_join(yearly_ofage.data, ymonthly_ofage.data)  
### add expected distribution for years, leap years as a column to dataframe
probborn <- c(31/365, 28/365, 31/365, 30/365, 31/365, 30/365, 31/365, 31/365, 30/365, 31/365, 30/365, 31/365)
probborn_leap <- c(31/366, 29/366, 31/366, 30/366, 31/366, 30/366, 31/366, 31/366, 30/366, 31/366, 30/366, 31/366)
#last year only four months
probborn06 <- c(31/365, 28/365, 31/365, 30/365)


prob.data <- c(probborn, probborn, probborn, probborn, probborn_leap, probborn, probborn06)



##
ymonthly_ofage.data <- ymonthly_ofage.data %>%
  mutate(N_born_expected = ymonthly_ofage.data$N_y_born*prob.data)


ymonthly_ofage.data$ym <- as.yearmon(paste(ymonthly_ofage.data$Birthyear,
                                           ymonthly_ofage.data$Birthmonth, sep = "-"))


names(yearly_ofage.data) <- c("Birthyear", "N Born", "EP Year")


ymonthly_ofage.data$deviation <- ymonthly_ofage.data$N_born_actual - ymonthly_ofage.data$N_born_expected

#create the respective bandwidth of 9 months around the cutoff
ymonthly_ofage.data$ym <- as.Date(ymonthly_ofage.data$ym)
ymonthly_ofage.data$EP <- ifelse(ymonthly_ofage.data$ym > as.Date("1985-09-01",
                                                                  format = '%Y-%m-%d') &
                                 ymonthly_ofage.data$ym < as.Date("1987-03-31",
                                                                  format = '%Y-%m-%d'),1,0)



avg_mbirth <- ymonthly_ofage.data %>%
  group_by(Birthmonth, EP) %>%
  dplyr::summarise(avgdev = mean(deviation),
                   stddev = sd(deviation))

avg_mbirth$EP <- factor(as.character(avg_mbirth$EP),
                                   labels = c("Other", "EP"))

p_mofage <- ggplot(data=avg_mbirth, aes(x= Birthmonth, y=avgdev, fill=EP)) +
geom_bar(stat="identity", position=position_dodge()) +
  geom_errorbar(aes(ymin=-avgdev-stddev, ymax=avgdev+stddev), width=.2,
                 position=position_dodge(.9))+
  #ggtitle("Mean deviation of actual number of respondents \ncoming of age from expected value") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(panel.background = element_blank(),                                              
        panel.grid.major = element_blank(),                                              
        panel.border = element_rect(colour = "black", fill=NA, size=1)) +
  scale_fill_grey() +
  scale_x_continuous(name = "Month of Birth",
                     breaks=seq(1, 12, 1),
                     labels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
                              "Jul", "Aug", "Sep", "Okt", "Nov", "Dec")) +
  scale_y_continuous(name="Mean deviation") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(text=element_text(size=10, family="CM Sans"))

ggsave(filename="figs/birthdates.pdf", plot=p_mofage,
       width = 10, height= 4, units= "cm", family="CM Sans")


p_mofage




## ----placebo-pol-int-coefplot, message=FALSE, fig.retina=5, out.width="70%", fig.height=3, fig.cap="Effect of first-time placebo EP eligibility and voting on interest in politics", fig.pos="htbp", fig.align="center"----


# placebo-estimates coefficient plot
# # Put model estimates into temporary data.frames:
plac00Frame <- data.frame(Variable = "CACE",
                          Coefficient = est_fit_p_ofage00.2sls_cov[2,2],
                          SE = est_fit_p_ofage00.2sls_cov[2,3],
                          modelName = "2000 Placebo")

plac00ITTFrame <- data.frame(Variable = "CACE",
                           Coefficient = coefficients(lm_p_ofage00)[2],
                           SE = BM_ofage00$adj.se[2],
                           modelName = "2000 Placebo")


plac01Frame <- data.frame( Variable = "CACE",
                          Coefficient = est_fit_p_ofage01.2sls_cov[2,2],
                          SE = est_fit_p_ofage01.2sls_cov[2,3],
                          modelName = "2001 Placebo")

plac01ITTFrame <- data.frame(Variable = "CACE",
                           Coefficient = coefficients(lm_p_ofage01)[2],
                           SE = BM_ofage01$adj.se[2],
                           modelName = "2001 Placebo")


plac02Frame <- data.frame( Variable = "CACE",
                          Coefficient =  est_fit_p_ofage02.2sls_cov[2,2],
                          SE = est_fit_p_ofage02.2sls_cov[2,3],
                          modelName = "2002 Placebo")

plac02ITTFrame <- data.frame(Variable = "CACE",
                           Coefficient = coefficients(lm_p_ofage02)[2],
                           SE = BM_ofage02$adj.se[2],
                           modelName = "2002 Placebo")



plac03Frame <- data.frame( Variable = "CACE",
                          Coefficient = est_fit_p_ofage03.2sls_cov[2,2],
                          SE = est_fit_p_ofage03.2sls_cov[2,3],
                          modelName = "2003 Placebo")

plac03ITTFrame <- data.frame(Variable = "CACE",
                           Coefficient = coefficients(lm_p_ofage03)[2],
                           SE = BM_ofage03$adj.se[2],
                           modelName = "2003 Placebo")


plac05Frame <- data.frame( Variable = "CACE",
                          Coefficient = est_fit_p_ofage05.2sls_cov[2,2],
                          SE = est_fit_p_ofage05.2sls_cov[2,3],
                          modelName = "2005 Placebo")

plac05ITTFrame <- data.frame(Variable = "CACE",
                             Coefficient = coefficients(lm_p_ofage05)[2],
                             SE = BM_ofage05$adj.se[2],
                             modelName = "2005 Placebo")


# # Combine these data.frames
allModelFrame <- data.frame(rbind(plac00Frame, plac00ITTFrame, plac01Frame, plac01ITTFrame, plac02Frame, plac02ITTFrame, plac03Frame, plac03ITTFrame, plac05Frame, plac05ITTFrame))  # etc.
names(allModelFrame)[names(allModelFrame)=="modelName"] <- "Model"
allModelFrame$Model <- as.factor(allModelFrame$Model)
names(allModelFrame)[names(allModelFrame)=="Variable"] <- "ITT / CACE"
allModelFrame$`CACE / ITT` <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)

# Specify the width of confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# # Plot

 zp1 <- ggplot(allModelFrame, aes(shape=Model)
               )
 zp1 <- zp1 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) #+ geom_hline(yintercept = 0.25, colour = "blue", lty = 2)
 zp1 <- zp1 + geom_linerange(aes(x = `CACE / ITT`, ymin = Coefficient - SE*interval1,
                                 ymax = Coefficient + SE*interval1),
                             lwd = 1, position = position_dodge(width = 1/2))
 zp1 <- zp1 + geom_pointrange(aes(x = `CACE / ITT`, y = Coefficient, ymin = Coefficient - SE*interval2,
                             ymax = Coefficient + SE*interval2),
                              lwd = 1/2, position = position_dodge(width = 1/2),
                             fill = "WHITE")
 zp1 <- zp1 + coord_flip()
zp1 <- zp1 + theme_minimal() + theme(axis.text.y=element_blank(), axis.ticks.y=element_blank(),
                 panel.border = element_blank(), panel.grid.major = element_blank(),
                 panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
 zp1 <- zp1 +  theme(plot.title = element_text(hjust = -0.75)) +
     theme(text=element_text(family="CM Sans"))




 ggsave(filename="./figs/coefplotplacebo.pdf", plot=zp1, family="CM Sans",
        width = 8, height= 4)



 zp1



## ----pol-int-placebo-european-interest, echo=FALSE, warning=FALSE, message=FALSE, results='asis', include=FALSE----

set.seed(64558030)

#  randomevents ####
#### 2000

#2000
p_ofage_E_00 <- euyou.data%>% filter(eligdate > as.Date("1999-09-01", format = '%Y-%m-%d') &
                                       eligdate < as.Date("2001-03-31", format = '%Y-%m-%d'))
p_ofage_E_00 <- p_ofage_E_00 %>% filter(eligdate != as.Date("2000-06-15", format = '%Y-%m-%d'))
p_ofage_E_00$eligtreat <- if_else(p_ofage_E_00$eligdate < as.Date("2000-06-15", format= '%Y-%m-%d'), 1, 0)
p_ofage_E_00$q11[p_ofage_E_00$eligtreat==0] <- 0


# generate identifier variable
p_ofage_E_00 = p_ofage_E_00 %>%
  mutate(ID00 = seq(1:nrow(p_ofage_E_00)))


colvector <- c("country", "eligtreat", "q2_3", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID00")
pna_ofage_E_00_lm <- p_ofage_E_00 %>% select_(.dots = colvector)
pna_ofage_E_00_lm <- na.omit(pna_ofage_E_00_lm)
names(pna_ofage_E_00_lm) <- colvector

blocks <- unique(pna_ofage_E_00_lm$country[!is.na(pna_ofage_E_00_lm$country)])
p_itblock <- sapply(blocks, function(x) {
  length(pna_ofage_E_00_lm$eligtreat[pna_ofage_E_00_lm$eligtreat==1 & pna_ofage_E_00_lm$country==x])/(length(pna_ofage_E_00_lm$eligtreat[pna_ofage_E_00_lm$country==x]))
})
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage_E_00_lm <- right_join(pna_ofage_E_00_lm, p_itblock.data)
pna_ofage_E_00_lm$ipw <- (1/pna_ofage_E_00_lm$p_itblock)*pna_ofage_E_00_lm$eligtreat +
  (1/(1-pna_ofage_E_00_lm$p_itblock))*(1-pna_ofage_E_00_lm$eligtreat)



## CACE 2000

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q2_3", "q11", covs[,1], "ID00")
cace_p_ofage_E_00.data <- p_ofage_E_00 %>% select_(.dots = col_vector)
cace_p_ofage_E_00.data$q11[cace_p_ofage_E_00.data$eligtreat==0] <- 0

cace_p_ofage_E_00.data <- na.omit(cace_p_ofage_E_00.data)
names(cace_p_ofage_E_00.data) <- col_vector
cace_p_ofage_E_00.data = left_join(cace_p_ofage_E_00.data, pna_ofage_E_00_lm)



lm_p_ofage_E_00 <- lm(q2_3 ~ eligtreat +   
                        highedu + Q44B + q45 + q47 + HIDIPLO +
                        male +  +q48 + votingp + interestp + schoolengage , data=cace_p_ofage_E_00.data, weights = ipw)
BM_ofage_E_00 <- BMlmSE(lm_p_ofage_E_00, clustervar = as.factor(cace_p_ofage_E_00.data$country))



fit_p_ofage_E_00.2sls <- ivreg(q2_3 ~ q11 + as.factor(country)
                               | eligtreat + as.factor(country), data=cace_p_ofage_E_00.data, weights = ipw)
est_fit_p_ofage_E_00.2sls <- commarobust_tidy(fit_p_ofage_E_00.2sls, clust_var = cace_p_ofage_E_00.data$country)


fit_p_ofage_E_00.2sls_cov <- ivreg(q2_3 ~ q11 + as.factor(country) +
                                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                                   | eligtreat + as.factor(country) +
                                     + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage_E_00.data, weights = ipw)
est_fit_p_ofage_E_00.2sls_cov <- commarobust_tidy(fit_p_ofage_E_00.2sls_cov, clust_var = cace_p_ofage_E_00.data$country)






#### 2001
#2001
p_ofage_E_01 <- euyou.data%>% filter(eligdate > as.Date("2000-09-01", format = '%Y-%m-%d') &
                                       eligdate < as.Date("2002-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage_E_01 <- p_ofage_E_01 %>% filter(eligdate != as.Date("2001-06-15", format = '%Y-%m-%d'))
p_ofage_E_01$eligtreat <- if_else(p_ofage_E_01$eligdate < as.Date("2001-06-15", format= '%Y-%m-%d'), 1, 0)
p_ofage_E_01$q11[p_ofage_E_01$eligtreat==0] <- 0


# generate identifier variable
p_ofage_E_01 = p_ofage_E_01 %>%
  mutate(ID01 = seq(1:nrow(p_ofage_E_01)))


colvector <- c("country", "eligtreat", "q2_3", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID01")
pna_ofage_E_01_lm <- p_ofage_E_01 %>% select_(.dots = colvector)
pna_ofage_E_01_lm <- na.omit(pna_ofage_E_01_lm)
names(pna_ofage_E_01_lm) <- colvector

blocks <- unique(pna_ofage_E_01_lm$country[!is.na(pna_ofage_E_01_lm$country)])
p_itblock <- sapply(blocks, function(x) {
  length(pna_ofage_E_01_lm$eligtreat[pna_ofage_E_01_lm$eligtreat==1 & pna_ofage_E_01_lm$country==x])/(length(pna_ofage_E_01_lm$eligtreat[pna_ofage_E_01_lm$country==x]))
})
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage_E_01_lm <- right_join(pna_ofage_E_01_lm, p_itblock.data)
pna_ofage_E_01_lm$ipw <- (1/pna_ofage_E_01_lm$p_itblock)*pna_ofage_E_01_lm$eligtreat +
  (1/(1-pna_ofage_E_01_lm$p_itblock))*(1-pna_ofage_E_01_lm$eligtreat)



## CACE 2001

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q2_3", "q11", covs[,1], "ID01")
cace_p_ofage_E_01.data <- p_ofage_E_01 %>% select_(.dots = col_vector)
cace_p_ofage_E_01.data$q11[cace_p_ofage_E_01.data$eligtreat==0] <- 0

cace_p_ofage_E_01.data <- na.omit(cace_p_ofage_E_01.data)
names(cace_p_ofage_E_01.data) <- col_vector
cace_p_ofage_E_01.data = left_join(cace_p_ofage_E_01.data, pna_ofage_E_01_lm)



lm_p_ofage_E_01 <- lm(q2_3 ~ eligtreat +   
                        highedu + Q44B + q45 + q47 + HIDIPLO +
                        male + q48 + votingp + interestp + schoolengage + as.factor(country), data=cace_p_ofage_E_01.data, weights = ipw)
BM_ofage_E_01 <- BMlmSE(lm_p_ofage_E_01, clustervar = as.factor(cace_p_ofage_E_01.data$country))



fit_p_ofage_E_01.2sls <- ivreg(q2_3 ~ q11 + as.factor(country)
                               | eligtreat + as.factor(country), data=cace_p_ofage_E_01.data, weights = ipw)
est_fit_p_ofage_E_01.2sls <- commarobust_tidy(fit_p_ofage_E_01.2sls, clust_var = cace_p_ofage_E_01.data$country)


fit_p_ofage_E_01.2sls_cov <- ivreg(q2_3 ~ q11 + as.factor(country) +
                                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                                   | eligtreat + as.factor(country) +
                                     + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage_E_01.data, weights = ipw)
est_fit_p_ofage_E_01.2sls_cov <- commarobust_tidy(fit_p_ofage_E_01.2sls_cov, clust_var = cace_p_ofage_E_01.data$country)







#### 2002
p_ofage_E_02 <- euyou.data%>% filter(eligdate > as.Date("2001-09-01", format = '%Y-%m-%d') &
                                       eligdate < as.Date("2003-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage_E_02 <- p_ofage_E_02 %>% filter(eligdate != as.Date("2002-06-15", format = '%Y-%m-%d'))
p_ofage_E_02$eligtreat <- if_else(p_ofage_E_02$eligdate < as.Date("2002-06-15", format= '%Y-%m-%d'), 1, 0)

p_ofage_E_02$q11[p_ofage_E_02$eligtreat==0] <- 0



# generate identifier variable
p_ofage_E_02 = p_ofage_E_02 %>%
  mutate(ID02 = seq(1:nrow(p_ofage_E_02)))


colvector <- c("country", "eligtreat", "q2_3", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID02")
pna_ofage_E_02_lm <- p_ofage_E_02 %>% select_(.dots = colvector)
pna_ofage_E_02_lm <- na.omit(pna_ofage_E_02_lm)
names(pna_ofage_E_02_lm) <- colvector

blocks <- unique(pna_ofage_E_02_lm$country[!is.na(pna_ofage_E_02_lm$country)])
p_itblock <- sapply(blocks, function(x) {
  length(pna_ofage_E_02_lm$eligtreat[pna_ofage_E_02_lm$eligtreat==1 & pna_ofage_E_02_lm$country==x])/(length(pna_ofage_E_02_lm$eligtreat[pna_ofage_E_02_lm$country==x]))
})
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage_E_02_lm <- right_join(pna_ofage_E_02_lm, p_itblock.data)
pna_ofage_E_02_lm$ipw <- (1/pna_ofage_E_02_lm$p_itblock)*pna_ofage_E_02_lm$eligtreat +
  (1/(1-pna_ofage_E_02_lm$p_itblock))*(1-pna_ofage_E_02_lm$eligtreat)



## CACE 2002

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q2_3", "q11", covs[,1], "ID02")
cace_p_ofage_E_02.data <- p_ofage_E_02 %>% select_(.dots = col_vector)
cace_p_ofage_E_02.data$q11[cace_p_ofage_E_02.data$eligtreat==0] <- 0

cace_p_ofage_E_02.data <- na.omit(cace_p_ofage_E_02.data)
names(cace_p_ofage_E_02.data) <- col_vector
cace_p_ofage_E_02.data = left_join(cace_p_ofage_E_02.data, pna_ofage_E_02_lm)



lm_p_ofage_E_02 <- lm(q2_3 ~ eligtreat +   
                        highedu + Q44B + q45 + q47 + HIDIPLO +
                        male +  +q48 + votingp + interestp + schoolengage , data=cace_p_ofage_E_02.data, weights = ipw)
BM_ofage_E_02 <- BMlmSE(lm_p_ofage_E_02, clustervar = as.factor(cace_p_ofage_E_02.data$country))


fit_p_ofage_E_02.2sls <- ivreg(q2_3 ~ q11 + as.factor(country)
                               | eligtreat + as.factor(country), data=cace_p_ofage_E_02.data, weights = ipw)
est_fit_p_ofage_E_02.2sls <- commarobust_tidy(fit_p_ofage_E_02.2sls, clust_var = cace_p_ofage_E_02.data$country)


fit_p_ofage_E_02.2sls_cov <- ivreg(q2_3 ~ q11 + as.factor(country) +
                                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                                   | eligtreat + as.factor(country) +
                                     + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage_E_02.data, weights = ipw)
est_fit_p_ofage_E_02.2sls_cov <- commarobust_tidy(fit_p_ofage_E_02.2sls_cov, clust_var = cace_p_ofage_E_02.data$country)





#### 2003
p_ofage_E_03 <- euyou.data%>% filter(eligdate > as.Date("2002-09-01", format = '%Y-%m-%d') &
                                       eligdate < as.Date("2004-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage_E_03 <- p_ofage_E_03 %>% filter(eligdate != as.Date("2003-06-15", format = '%Y-%m-%d'))
p_ofage_E_03$eligtreat <- if_else(p_ofage_E_03$eligdate < as.Date("2003-06-15", format= '%Y-%m-%d'), 1, 0)
p_ofage_E_03$q11[p_ofage_E_03$eligtreat==0] <- 0



# generate identifier variable
p_ofage_E_03 = p_ofage_E_03 %>%
  mutate(ID03 = seq(1:nrow(p_ofage_E_03)))


colvector <- c("country", "eligtreat", "q2_3", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "q11", "ID03")
pna_ofage_E_03_lm <- p_ofage_E_03 %>% select_(.dots = colvector)
pna_ofage_E_03_lm <- na.omit(pna_ofage_E_03_lm)
names(pna_ofage_E_03_lm) <- colvector

blocks <- unique(pna_ofage_E_03_lm$country[!is.na(pna_ofage_E_03_lm$country)])
p_itblock <- sapply(blocks, function(x) {
  length(pna_ofage_E_03_lm$eligtreat[pna_ofage_E_03_lm$eligtreat==1 & pna_ofage_E_03_lm$country==x])/(length(pna_ofage_E_03_lm$eligtreat[pna_ofage_E_03_lm$country==x]))
})
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage_E_03_lm <- right_join(pna_ofage_E_03_lm, p_itblock.data)
pna_ofage_E_03_lm$ipw <- (1/pna_ofage_E_03_lm$p_itblock)*pna_ofage_E_03_lm$eligtreat +
  (1/(1-pna_ofage_E_03_lm$p_itblock))*(1-pna_ofage_E_03_lm$eligtreat)



## CACE 2003

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q2_3", "q11", covs[,1], "ID03")
cace_p_ofage_E_03.data <- p_ofage_E_03 %>% select_(.dots = col_vector)
cace_p_ofage_E_03.data$q11[cace_p_ofage_E_03.data$eligtreat==0] <- 0

cace_p_ofage_E_03.data <- na.omit(cace_p_ofage_E_03.data)
names(cace_p_ofage_E_03.data) <- col_vector
cace_p_ofage_E_03.data = left_join(cace_p_ofage_E_03.data, pna_ofage_E_03_lm)



lm_p_ofage_E_03 <- lm(q2_3 ~ eligtreat +   
                        highedu + Q44B + q45 + q47 + HIDIPLO +
                        male +  +q48 + votingp + interestp + schoolengage , data=cace_p_ofage_E_03.data, weights = ipw)
BM_ofage_E_03 <- BMlmSE(lm_p_ofage_E_03, clustervar = as.factor(cace_p_ofage_E_03.data$country))


fit_p_ofage_E_03.2sls <- ivreg(q2_3 ~ q11 + as.factor(country)
                               | eligtreat + as.factor(country), data=cace_p_ofage_E_03.data, weights = ipw)
est_fit_p_ofage_E_03.2sls <- commarobust_tidy(fit_p_ofage_E_03.2sls, clust_var = cace_p_ofage_E_03.data$country)


fit_p_ofage_E_03.2sls_cov <- ivreg(q2_3 ~ q11 + as.factor(country) +
                                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                                   | eligtreat + as.factor(country) +
                                     + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage_E_03.data, weights = ipw)
est_fit_p_ofage_E_03.2sls_cov <- commarobust_tidy(fit_p_ofage_E_03.2sls_cov, clust_var = cace_p_ofage_E_03.data$country)



#### 2005
p_ofage_E_05 <- euyou.data%>% filter(eligdate > as.Date("2004-09-01", format = '%Y-%m-%d') &
                                       eligdate < as.Date("2006-03-31", format = '%Y-%m-%d'))
#exclude those born in EP month
p_ofage_E_05 <- p_ofage_E_05 %>% filter(eligdate != as.Date("2005-06-15", format = '%Y-%m-%d'))
p_ofage_E_05$eligtreat <- if_else(p_ofage_E_05$eligdate < as.Date("2005-06-15", format= '%Y-%m-%d'), 1, 0)

# create a variable measuring the future "compliance" among those who are future eligibles
p_ofage_E_05$p_q11 <- NULL
p_ofage_E_05$p_q11[p_ofage_E_05$eligtreat==0] <- 0
p_ofage_E_05$p_q11[p_ofage_E_05$eligtreat==1] <- rbinom(length(p_ofage_E_05$eligtreat==1), 1, 0.485)






# generate identifier variable
p_ofage_E_05 = p_ofage_E_05 %>%
  mutate(ID05 = seq(1:nrow(p_ofage_E_05)))


colvector <- c("country", "eligtreat", "q2_3", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "p_q11", "ID05")
pna_ofage_E_05_lm <- p_ofage_E_05 %>% select_(.dots = colvector)
pna_ofage_E_05_lm <- na.omit(pna_ofage_E_05_lm)
names(pna_ofage_E_05_lm) <- colvector

blocks <- unique(pna_ofage_E_05_lm$country[!is.na(pna_ofage_E_05_lm$country)])
p_itblock <- sapply(blocks, function(x) {
  length(pna_ofage_E_05_lm$eligtreat[pna_ofage_E_05_lm$eligtreat==1 & pna_ofage_E_05_lm$country==x])/(length(pna_ofage_E_05_lm$eligtreat[pna_ofage_E_05_lm$country==x]))
})
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
pna_ofage_E_05_lm <- right_join(pna_ofage_E_05_lm, p_itblock.data)
pna_ofage_E_05_lm$ipw <- (1/pna_ofage_E_05_lm$p_itblock)*pna_ofage_E_05_lm$eligtreat +
  (1/(1-pna_ofage_E_05_lm$p_itblock))*(1-pna_ofage_E_05_lm$eligtreat)



## CACE 2005

### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q2_3", "p_q11", covs[,1], "ID05")
cace_p_ofage_E_05.data <- p_ofage_E_05 %>% select_(.dots = col_vector)


cace_p_ofage_E_05.data$p_q11[cace_p_ofage_E_05.data$eligtreat==0] <- 0
cace_p_ofage_E_05.data <- na.omit(cace_p_ofage_E_05.data)
names(cace_p_ofage_E_05.data) <- col_vector

cace_p_ofage_E_05.data = left_join(cace_p_ofage_E_05.data, pna_ofage_E_05_lm)



lm_p_ofage_E_05 <- lm(q2_3 ~ eligtreat +   
                        highedu + Q44B + q45 + q47 + HIDIPLO +
                        male + q48 + votingp + interestp + schoolengage + as.factor(country) , data=cace_p_ofage_E_05.data, weights = ipw)
BM_ofage_E_05 <- BMlmSE(lm_p_ofage_E_05, clustervar = as.factor(cace_p_ofage_E_05.data$country))


fit_p_ofage_E_05.2sls <- ivreg(q2_3 ~ p_q11 + as.factor(country)
                               | eligtreat + as.factor(country), data=cace_p_ofage_E_05.data, weights = ipw)
est_fit_p_ofage_E_05.2sls <- commarobust_tidy(fit_p_ofage_E_05.2sls, clust_var = cace_p_ofage_E_05.data$country)


fit_p_ofage_E_05.2sls_cov <- ivreg(q2_3 ~ p_q11 + as.factor(country) +
                                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                                   | eligtreat + as.factor(country) +
                                     + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_p_ofage_E_05.data, weights = ipw)
est_fit_p_ofage_E_05.2sls_cov <- commarobust_tidy(fit_p_ofage_E_05.2sls_cov, clust_var = cace_p_ofage_E_05.data$country)



#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE B.6: Effect of eligibility and voting in placebo-EP years on European political interest   ###
#<!-- ------------------------------------------------------------------------------------------ -->


stargazer(lm_p_ofage_E_00,
          lm_p_ofage_E_01,
          lm_p_ofage_E_02,
          lm_p_ofage_E_03,
          lm_p_ofage_E_05,
          se=list(BM_ofage_E_00$adj.se,
                  BM_ofage_E_01$adj.se,
                  BM_ofage_E_02$adj.se,
                  BM_ofage_E_03$adj.se,
                  BM_ofage_E_05$adj.se
          ),
          omit = c("Constant" ,"country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          dep.var.caption=c("Dependent variable: European political interest"),
          dep.var.labels   = c("2000", "2001", "2002", "2003", "2005"),
          covariate.labels = c("Eligible (OLS)"),
          title=c("Effect of eligibility and voting in placebo-EP years on European political interest"),
          header=FALSE, table.placement = "tb", notes=NULL,
          label = c("tab:placeurint"),
          font.size="small",
          single.row = TRUE,
          digits=2,
          type = "text", out="./tableB6_1.txt"  
)


stargazer(fit_p_ofage_E_00.2sls_cov,
          fit_p_ofage_E_01.2sls_cov,
          fit_p_ofage_E_02.2sls_cov,
          fit_p_ofage_E_03.2sls_cov,
          fit_p_ofage_E_05.2sls_cov,
          se = list(est_fit_p_ofage_E_00.2sls_cov[,3],
                    est_fit_p_ofage_E_01.2sls_cov[,3],
                    est_fit_p_ofage_E_02.2sls_cov[,3],
                    est_fit_p_ofage_E_03.2sls_cov[,3],
                    est_fit_p_ofage_E_05.2sls_cov[,3]),
          p = list(est_fit_p_ofage_E_00.2sls_cov[,5],
                   est_fit_p_ofage_E_01.2sls_cov[,5],
                   est_fit_p_ofage_E_02.2sls_cov[,5],
                   est_fit_p_ofage_E_03.2sls_cov[,5],
                   est_fit_p_ofage_E_05.2sls_cov[,5]),
          omit.stat=c("f", "ser", "adj.rsq", "rsq"),
          omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          dep.var.caption=c("Dependent variable: European political interest"),
          dep.var.labels   = c("2000", "2001", "2002", "2003", "2005"),
          covariate.labels = c("Voting (IV)"),
          title=c("Effect of eligibility and voting in placebo-EP years on European political interest"),
          header=FALSE, table.placement = "tb", notes=NULL,
          label = c("tab:placiveurint"),
          font.size="small",
          single.row = TRUE,
          digits=2,
          type = "text", out="./tableB6_2.txt")



## ----tiny-bd-5, warning=FALSE, message=FALSE, results="asis", include=FALSE----

set.seed(64558030)

int_lm_epsoc_tiny5 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=na_epsoc5_q1.data, weights = ipw)
BM_epsoc_tiny5_int <- BMlmSE(int_lm_epsoc_tiny5, clustervar = as.factor(na_epsoc5_q1.data$country))



## randomization inference
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc5_q1.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility
tmplm <- lm(q1 ~ highedu + Q44B + q45 + q47 + HIDIPLO +
              male  +q48 + votingp + interestp + schoolengage  +
              as.factor(country) , data = na_epsoc5_q1.data, weights=ipw, clusters=country)
na_epsoc5_q1.data = na_epsoc5_q1.data %>% mutate(resid_int = resid(tmplm))
est_itt <- coef(lm(resid_int ~ eligtreat, data = na_epsoc5_q1.data))[["eligtreat"]]
rand_dist <- replicate(10^3, permute_block_sharp_null(z = na_epsoc5_q1.data$eligtreat, y = na_epsoc5_q1.data$resid_int))

## p-value
p_two_sided_epsoc_tiny5 <- 2* min(mean(rand_dist >= est_itt), mean(rand_dist <= est_itt))



### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1])
cace_epsoc5_q1.data <- epsoc5.data %>% select_(.dots = col_vector)
cace_epsoc5_q1.data$q11[cace_epsoc5_q1.data$eligtreat==0] <- 0

cace_epsoc5_q1.data <- na.omit(cace_epsoc5_q1.data)
names(cace_epsoc5_q1.data) <- col_vector
cace_epsoc5_q1.data = left_join(cace_epsoc5_q1.data, na_epsoc5_q1.data) ##


fit_tiny5bd.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_epsoc5_q1.data, weights=ipw)

est_fit_tiny5bd.2sls_cov <- commarobust_tidy(fit_tiny5bd.2sls_cov, clust_var = cace_epsoc5_q1.data$country)





## ----tiny-bd-6, warning=FALSE, message=FALSE, results="asis", include=FALSE----

set.seed(64558030)

int_lm_epsoc_tiny6 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=na_epsoc6_q1.data, weights = ipw)
BM_epsoc_tiny6_int <- BMlmSE(int_lm_epsoc_tiny6, clustervar = as.factor(na_epsoc6_q1.data$country))



## randomization inference
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc6_q1.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility
tmplm <- lm(q1 ~ highedu + Q44B + q45 + q47 + HIDIPLO +
              male  +q48 + votingp + interestp + schoolengage  +
              as.factor(country) , data = na_epsoc6_q1.data, weights=ipw, clusters=country)
na_epsoc6_q1.data = na_epsoc6_q1.data %>% mutate(resid_int = resid(tmplm))
est_itt <- coef(lm(resid_int ~ eligtreat, data = na_epsoc6_q1.data))[["eligtreat"]]
rand_dist <- replicate(10^3, permute_block_sharp_null(z = na_epsoc6_q1.data$eligtreat, y = na_epsoc6_q1.data$resid_int))

## p-value
p_two_sided_epsoc_tiny6 <- 2* min(mean(rand_dist >= est_itt), mean(rand_dist <= est_itt))



### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1])
cace_epsoc6_q1.data <- epsoc6.data %>% select_(.dots = col_vector)
cace_epsoc6_q1.data$q11[cace_epsoc6_q1.data$eligtreat==0] <- 0

cace_epsoc6_q1.data <- na.omit(cace_epsoc6_q1.data)
names(cace_epsoc6_q1.data) <- col_vector
cace_epsoc6_q1.data = left_join(cace_epsoc6_q1.data, na_epsoc6_q1.data) ##


fit_tiny6bd.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_epsoc6_q1.data, weights=ipw)

est_fit_tiny6bd.2sls_cov <- commarobust_tidy(fit_tiny6bd.2sls_cov, clust_var = cace_epsoc6_q1.data$country)





## ----tiny-bd-7, warning=FALSE, message=FALSE, results="asis", include=FALSE----

set.seed(64558030)


int_lm_epsoc_tiny7 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=na_epsoc7_q1.data, weights = ipw)
BM_epsoc_tiny7_int <- BMlmSE(int_lm_epsoc_tiny7, clustervar = as.factor(na_epsoc7_q1.data$country))



## randomization inference
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc7_q1.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility
tmplm <- lm(q1 ~ highedu + Q44B + q45 + q47 + HIDIPLO +
              male  +q48 + votingp + interestp + schoolengage  +
              as.factor(country) , data = na_epsoc7_q1.data, weights=ipw, clusters=country)
na_epsoc7_q1.data = na_epsoc7_q1.data  %>% mutate(resid_int = resid(tmplm))
est_itt <- coef(lm(resid_int ~ eligtreat, data = na_epsoc7_q1.data))[["eligtreat"]]
rand_dist <- replicate(10^3, permute_block_sharp_null(z = na_epsoc7_q1.data$eligtreat, y = na_epsoc7_q1.data$resid_int))

## p-value
p_two_sided_epsoc_tiny7 <- 2* min(mean(rand_dist >= est_itt), mean(rand_dist <= est_itt))



### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1])
cace_epsoc7_q1.data <- epsoc7.data %>% select_(.dots = col_vector)
cace_epsoc7_q1.data$q11[cace_epsoc7_q1.data$eligtreat==0] <- 0

cace_epsoc7_q1.data <- na.omit(cace_epsoc7_q1.data)
names(cace_epsoc7_q1.data) <- col_vector
cace_epsoc7_q1.data = left_join(cace_epsoc7_q1.data, na_epsoc7_q1.data) ##


fit_tiny7bd.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_epsoc7_q1.data, weights = ipw)

est_fit_tiny7bd.2sls_cov <- commarobust_tidy(fit_tiny7bd.2sls_cov, clust_var = cace_epsoc7_q1.data$country)





## ----tiny-bd-8, warning=FALSE, message=FALSE, results="asis", include=FALSE----

set.seed(64558030)


int_lm_epsoc_tiny8 <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage , data=na_epsoc8_q1.data, weights = ipw)
BM_epsoc_tiny8_int <- BMlmSE(int_lm_epsoc_tiny8, clustervar = as.factor(na_epsoc8_q1.data$country))



## randomization inference
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc8_q1.data$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility
tmplm <- lm(q1 ~ highedu + Q44B + q45 + q47 + HIDIPLO +
              male  +q48 + votingp + interestp + schoolengage  +
              as.factor(country) , data = na_epsoc8_q1.data, weights=ipw, clusters=country)
na_epsoc8_q1.data = na_epsoc8_q1.data  %>% mutate(resid_int = resid(tmplm))
est_itt <- coef(lm(resid_int ~ eligtreat, data = na_epsoc8_q1.data))[["eligtreat"]]
rand_dist <- replicate(10^3, permute_block_sharp_null(z = na_epsoc8_q1.data$eligtreat, y = na_epsoc8_q1.data$resid_int))

## p-value
p_two_sided_epsoc_tiny8 <- 2* min(mean(rand_dist >= est_itt), mean(rand_dist <= est_itt))



### cace outcome political interest ###
col_vector <- c("country", "eligtreat", "q1", "q11", covs[,1])
cace_epsoc8_q1.data <- epsoc8.data %>% select_(.dots = col_vector)
cace_epsoc8_q1.data$q11[cace_epsoc8_q1.data$eligtreat==0] <- 0

cace_epsoc8_q1.data <- na.omit(cace_epsoc8_q1.data)
names(cace_epsoc8_q1.data) <- col_vector
cace_epsoc8_q1.data = left_join(cace_epsoc8_q1.data, na_epsoc8_q1.data) ##


fit_tiny8bd.2sls_cov <- ivreg(q1 ~ q11 + as.factor(country) +
                     highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage
                    | eligtreat + as.factor(country) +
                    + highedu + Q44B + q45 + q47 + HIDIPLO +
                                     male  +q48 + votingp + interestp + schoolengage,  data=cace_epsoc8_q1.data, weights = ipw)

est_fit_tiny8bd.2sls_cov <- commarobust_tidy(fit_tiny8bd.2sls_cov, clust_var = cace_epsoc8_q1.data$country)






## ----tiny-bd-all-table, warning=FALSE, message=FALSE, results="asis"-----

#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE B.7: Effects across bandwidths     ###
#<!-- ------------------------------------------------------------------------------------------ -->


stargazer(int_lm_epsoc_tiny8,
          fit_tiny8bd.2sls_cov,
          int_lm_epsoc_tiny7,
          fit_tiny7bd.2sls_cov,
          int_lm_epsoc_tiny6,
          fit_tiny6bd.2sls_cov,
          int_lm_epsoc_tiny5,
          fit_tiny5bd.2sls_cov,
          int_lm_epsoc_tiny4,
          fit_tiny4bd.2sls_cov,
            se=list(BM_epsoc_tiny8_int$adj.se,
                    est_fit_tiny8bd.2sls_cov[,3],
                    BM_epsoc_tiny7_int$adj.se,
                    est_fit_tiny7bd.2sls_cov[,3],
                    BM_epsoc_tiny6_int$adj.se,
                    est_fit_tiny6bd.2sls_cov[,3],
                    BM_epsoc_tiny5_int$adj.se,
                    est_fit_tiny5bd.2sls_cov[,3],
                    BM_epsoc_tiny4_int$adj.se,
                    est_fit_tiny4bd.2sls_cov[,3]),
            p=list(NULL, est_fit_tiny8bd.2sls_cov[,5],
                   NULL, est_fit_tiny7bd.2sls_cov[,5],
                   NULL, est_fit_tiny6bd.2sls_cov[,5],
                   NULL, est_fit_tiny5bd.2sls_cov[,5],
                   NULL, est_fit_tiny4bd.2sls_cov[,5]),
            omit.stat=c("f", "ser", "adj.rsq", "rsq"),
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_epsoc_tiny8, digits=3), "",
                             round(p_two_sided_epsoc_tiny7, digits=3), "",
                             round(p_two_sided_epsoc_tiny6, digits=3), "",
                             round(p_two_sided_epsoc_tiny5, digits=3), "",
                             round(p_two_sided_epsoc_tiny4, digits=3), ""),
                           c("Age",
                             "[17.33-18.67]", "[17.33-18.67]",
                             "[17.42-18.58]", "[17.42-18.58]",
                             "[17.50-18.50]", "[17.50-18.50]",
                             "[17.58-18.42]", "[17.58-18.42]",
                             "[17.67-18.33]", "[17.67-18.33]"),
                           c("Method",
                             "OLS", "IV",
                              "OLS", "IV",
                             "OLS", "IV",
                             "OLS", "IV",
                             "OLS", "IV"),
                           c("Controls",
                             "yes", "yes",
                             "yes", "yes",
                             "yes", "yes",
                             "yes", "yes",
                             "yes", "yes")
          ),
          covariate.labels = c("Eligible", "Voting"),
          omit=c("Constant", "country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "q47", "male", "q48", "votingp", "interestp", "schoolengage"),
          title=c("Effect of EP eligibility and voting on interest in politics across different bandwidths"),
          dep.var.caption=c("Dependent variable: political interest"),
          #dep.var.labels   = c("Eligibility", "Eligibility", "Participation", "Participation"),
          dep.var.labels.include = FALSE,
          header=FALSE, table.placement = "tb",  
          model.names = FALSE,
          font.size="footnotesize",
          notes.append = FALSE,
          notes = c("\\parbox{\\textwidth}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. Constant and country-fixed effects omitted from output. Bell-McCaffrey bias adjusted robust SE in parentheses. Inverse probability weights accounting for different probabilities of assignment to treatment and control conditions between country blocks. Entries of eligibility present ITT estimates, entries of voting present CACE estimates.}"),
          notes.label = "",
          notes.align = "l",
          single.row = TRUE,
          digits=2,
          column.sep.width = "-5pt",
          label = "tab:bandwidths", 
          type = "text", out="./tableB7.txt"  
          )







# ## ----supplementary-matching, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE----
# 
# # computationally intense
# 
# #### MATCHING
# 
# ## Matching: political interest ITT #
# 
# X  <- na_epsoc_q1.data %>% dplyr::select(highedu,
#                                          Q44B,
#                                          q45,
#                                          q47,
#                                          HIDIPLO,
#                                          male,
#                                          q48,
#                                          votingp,
#                                          interestp,
#                                          schoolengage,
#                                          country) # Define covariates
# Y  <- na_epsoc_q1.data$q1 # Define outcome/Response vector
# Tr  <- na_epsoc_q1.data$eligtreat # Define treatment vector
# 
# 
# 
# BalanceMat <- cbind(na_epsoc9_q1.data$Q44B,
#                     na_epsoc9_q1.data$votingp)
# 
# 
# gen1 <- GenMatch(Tr = Tr,
#                  X = X,
#                  BalanceMatrix = BalanceMat,
#                  estimand = "ATT",
#                  M = 1, # 1:1 matching
#                  pop.size=500, nboots = 500,
#                  max.generations=100)   
# 
# 
# mgens <- Match(Y=Y, Tr= Tr, X = X, estimand="ATT",
#                Weight.matrix = gen1)
# 
# 
# 
# mb <- MatchBalance(Tr ~ Q44B + votingp ,
#                    match.out = mgens, nboots = 500, data = na_epsoc9_q1.data)
# mgens$index.treated
# 
# 
# # Regression with matched data
# index.matched.units <- c(mgens$index.treated,mgens$index.control)
# data_epsoc_q1_matched <- na_epsoc_q1.data[index.matched.units,]
# 
# 
# 
# 
# 
# 
# ## Matching: political interest IV #
# 
# X  <- cace_epsoc9_q1.data %>% dplyr::select(highedu,
#                                          Q44B,
#                                          q45,
#                                          q47,
#                                          HIDIPLO,
#                                          male,
#                                          q48,
#                                          votingp,
#                                          interestp,
#                                          schoolengage) # Define covariates
# Y  <- cace_epsoc9_q1.data$q1 # Define outcome/Response vector
# Tr  <- cace_epsoc9_q1.data$eligtreat # Define treatment vector
# 
# 
# BalanceMat <- cbind(cace_epsoc9_q1.data$Q44B,
#                     cace_epsoc9_q1.data$votingp)
# 
# 
# gen1 <- GenMatch(Tr = Tr,
#                  X = X,
#                  BalanceMatrix = BalanceMat,
#                  estimand = "ATT",
#                  M = 1, # 1:1 matching
#                  pop.size=500, nboots = 500,
#                  max.generations=100)   
# 
# 
# mgens <- Match(Y=Y, Tr= Tr, X = X, estimand="ATT",
#                Weight.matrix = gen1)
# 
# 
# 
# mb <- MatchBalance(Tr ~ Q44B + votingp,
#                    match.out = mgens, nboots = 500, data = cace_epsoc9_q1.data)
# 
# 
# # Regression with matched data
# index.matched.units <- c(mgens$index.treated,mgens$index.control)
# data_cace_epsoc_q1_matched <- cace_epsoc9_q1.data[index.matched.units,]
# 
# 
# 
# 
# 
# 
# #write the data tables as to prevent that RMarkdown file takes hours to compile.
# 
# write.csv(data_epsoc_q1_matched, file="./data_epsoc_q1_matched.csv")
# write.csv(data_cace_epsoc_q1_matched, file="./data_cace_epsoc_q1_matched.csv")


## ----matching-analysis---------------------------------------------------

set.seed(64558030)

data_epsoc_q1_matched <- read.csv(file="./data_epsoc_q1_matched.csv")
data_cace_epsoc_q1_matched <- read.csv(file="./data_cace_epsoc_q1_matched.csv")


#create new ipws ITT estimate


data_epsoc_q1_matched$ipw <- NULL
data_epsoc_q1_matched$p_itblock <- NULL

#
#### weighting to account for different probabilities of treatment within strata
#
## IPW data_epsoc_q1_matched ###

blocks <- unique(data_epsoc_q1_matched$country[!is.na(data_epsoc_q1_matched$country)])  # number of blocks in the data
#
 p_itblock <- sapply(blocks, function(x) {
     length(data_epsoc_q1_matched$eligtreat[data_epsoc_q1_matched$eligtreat==1 & data_epsoc_q1_matched$country==x])/(length(data_epsoc_q1_matched$eligtreat[data_epsoc_q1_matched$country==x]))
   })
 # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
 p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
 colnames(p_itblock.data) <- c("country", "p_itblock")

data_epsoc_q1_matched <- right_join(data_epsoc_q1_matched, p_itblock.data)
 #calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
 data_epsoc_q1_matched$ipw <- (1/data_epsoc_q1_matched$p_itblock)*data_epsoc_q1_matched$eligtreat +
                     (1/(1-data_epsoc_q1_matched$p_itblock))*(1-data_epsoc_q1_matched$eligtreat)






# create new ipws: IV estimate

 data_cace_epsoc_q1_matched$ipw <- NULL
 data_cace_epsoc_q1_matched$p_itblock <- NULL
#
#
#### weighting to account for different probabilities of treatment within strata
#
## IPW data_cace_epsoc_q1_matched ###
 blocks <- unique(data_cace_epsoc_q1_matched$country[!is.na(data_cace_epsoc_q1_matched$country)])  # number of blocks in the data
#
 p_itblock <- sapply(blocks, function(x) {
     length(data_cace_epsoc_q1_matched$eligtreat[data_cace_epsoc_q1_matched$eligtreat==1 & data_cace_epsoc_q1_matched$country==x])/(length(data_cace_epsoc_q1_matched$eligtreat[data_cace_epsoc_q1_matched$country==x]))
   }) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
 p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
 colnames(p_itblock.data) <- c("country", "p_itblock")

data_cace_epsoc_q1_matched <- right_join(data_cace_epsoc_q1_matched, p_itblock.data)
 #calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
 data_cace_epsoc_q1_matched$ipw <- (1/data_cace_epsoc_q1_matched$p_itblock)*data_cace_epsoc_q1_matched$eligtreat +
                     (1/(1-data_cace_epsoc_q1_matched$p_itblock))*(1-data_cace_epsoc_q1_matched$eligtreat)






int_uni_epsoc_m <- lm(q1 ~ eligtreat + as.factor(country), data=data_epsoc_q1_matched, weights = ipw)
BM_epsoc_int_uni_m <- BMlmSE(int_uni_epsoc_m, clustervar = as.factor(data_epsoc_q1_matched$country))



int_lm_epsoc_m <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) , data=data_epsoc_q1_matched, weights = ipw, clusters = country)
BM_epsoc_int_m <- BMlmSE(int_lm_epsoc_m, clustervar = as.factor(data_epsoc_q1_matched$country))




permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = data_epsoc_q1_matched$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}


#eligibility
#bivariate + countryFE
tmplm <- lm(q1 ~ eligtreat + as.factor(country), data=data_epsoc_q1_matched, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = data_epsoc_q1_matched$eligtreat, y = data_epsoc_q1_matched$q1))

## p-value blocks
p_two_sided_epsoc_m_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariates
tmplm <- lm(q1 ~ eligtreat +   
            highedu + Q44B + q45 + q47 + HIDIPLO +
            male +  +q48 + votingp + interestp + schoolengage + as.factor(country) , data=data_epsoc_q1_matched, weights = ipw, clusters = country)
est_itt <- coef(tmplm)[["eligtreat"]]




## p-value blocks
p_two_sided_epsoc_m <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))





## IV estimates

fit_m.2sls <- ivreg(q1 ~ q11 + as.factor(country)
                  | eligtreat + as.factor(country), data= data_cace_epsoc_q1_matched, weights=ipw)
est_fit_m.2sls <- commarobust_tidy(fit_m.2sls, clust_var = data_cace_epsoc_q1_matched$country)


fit_m.2sls_cov <- ivreg(q1 ~ q11 + highedu + Q44B + q45 + q47 + HIDIPLO +
                        male  +q48 + votingp + interestp + schoolengage + as.factor(country)
                    |  eligtreat + highedu + Q44B + q45 + q47 + HIDIPLO +
                        male  +q48 + votingp + interestp + schoolengage + as.factor(country),  data=data_cace_epsoc_q1_matched,  clusters=country, weights = ipw)
est_fit_m.2sls_cov <- commarobust_tidy(fit_m.2sls_cov, clust_var = data_cace_epsoc_q1_matched$country)



#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE B.8: Effect of first-time EP eligibility and voting on interest in politics (matched dataset) ###
#<!-- ------------------------------------------------------------------------------------------ -->


## ----polint_matched, message=FALSE, results="asis"-----------------------


polint_m_randominfp <- c(round(p_two_sided_epsoc_m_uni, digits=2), round(p_two_sided_epsoc_m, digits=2), NULL, NULL)


stargazer(int_uni_epsoc_m,
          int_lm_epsoc_m,
          fit_m.2sls,
          fit_m.2sls_cov,
          se=list(BM_epsoc_int_uni_m$adj.se,
                  BM_epsoc_int_m$adj.se,
                  est_fit_m.2sls[,3],
                  est_fit_m.2sls_cov[,3]),
          p = list(NULL,
                  NULL,
                  est_fit_m.2sls[,5],
                  est_fit_m.2sls_cov[,5]),
              omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          dep.var.caption=c("Dependent variable: political interest"),
          #dep.var.labels   = c("OLS", "OLS", "CACE", "CACE"),
          dep.var.labels.include = FALSE,
          covariate.labels = c("Eligible", "Voting"),
           add.lines = list(
                               c(
                              "Random. Inf. (p-value)",
                              round(p_two_sided_epsoc_m_uni, digits=3),
                              round(p_two_sided_epsoc_m, digits=3),
                              NULL,
                              NULL),
                           c("Age", "[17.25-18.75]", "[17.25-18.75]", "[17.25-18.75]", "[17.25-18.75]"),
                           c("Method", "OLS", "OLS", "IV", "IV"),
                           c("Controls", "no", "yes", "no", "yes")),
          #, "Education", "Household with Parents", "Standard of Living", "Religious", "Higher Education Parents", "Gender", "Urban-Rural", "Voting Habits Parents", "Political Interest Parents", "Civic Engagement in School"),
          title=c("Effect of first-time EP eligibility and voting on interest in politics (matched dataset)"),
          #order = c(1, 12, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
          header=FALSE, 
          notes.append = FALSE,
          model.names = FALSE,
         notes.align = "l",
          notes = c("\\parbox{0.8\\textwidth}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. Constant and country fixed-effects omitted from output. Model 3 and 4 show the causal average complier effect (CACE) from using the eligibility as instrument. Bell-McCaffrey bias adjusted robust SE in parentheses. Inverse probability weights accounting for different probabilities of assignment to treatment and control conditions between country blocks.}"),
          notes.label = "",
          label = "tab:polintmatched",
          font.size="small",
          single.row = TRUE,
          digits=2,
          #align=TRUE,
          column.sep.width = "5pt", 
         type="text", out="./tableB8.txt"
                    )




## ----lm-elig-partisan-extended, warning=FALSE, echo=FALSE, results='asis', message=FALSE, include=FALSE----

set.seed(64558030)



## Effects on Partisan Preferences

#nongov parties  
colvector <- c("country", "eligtreat", "prop_nongov", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "ID", "q11")
na_epsoc_lm_nongov <- epsoc.data %>% select_(.dots = colvector)
na_epsoc_lm_nongov$q11[na_epsoc_lm_nongov$eligtreat==0] <- 0

na_epsoc_lm_nongov <- na.omit(na_epsoc_lm_nongov)
names(na_epsoc_lm_nongov) <- colvector

# weighting to account for different probabilities of treatment within strata ####

### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_lm_nongov$country[!is.na(na_epsoc_lm_nongov$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc_lm_nongov$eligtreat[na_epsoc_lm_nongov$eligtreat==1 & na_epsoc_lm_nongov$country==x])/(length(na_epsoc_lm_nongov$eligtreat[na_epsoc_lm_nongov$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment Elig 2004", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc_lm_nongov <- right_join(na_epsoc_lm_nongov, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_lm_nongov$ipw <- (1/na_epsoc_lm_nongov$p_itblock)*na_epsoc_lm_nongov$eligtreat +
  (1/(1-na_epsoc_lm_nongov$p_itblock))*(1-na_epsoc_lm_nongov$eligtreat)


epsoc.data_lm_nongov_uni <- lm(prop_nongov ~ eligtreat + as.factor(country),  
                            data = na_epsoc_lm_nongov, weights=ipw, clusters= country)
BM_epsoc_nongov_uni <- BMlmSE(epsoc.data_lm_nongov_uni, clustervar = as.factor(na_epsoc_lm_nongov$country))

epsoc.data_lm_nongov <- lm(prop_nongov ~ eligtreat +   
                          highedu + Q44B + q45 + q47 + HIDIPLO +
                          male +  +q48 + votingp + interestp + schoolengage + as.factor(country) ,
                        data = na_epsoc_lm_nongov, weights=ipw, clusters = country)
BM_epsoc_nongov <- BMlmSE(epsoc.data_lm_nongov, clustervar = as.factor(na_epsoc_lm_nongov$country))


#randomisation inference prop_nongov
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_lm_nongov$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}

#bivariate + countryFE
tmplm <- lm(prop_nongov ~ eligtreat + as.factor(country), data=na_epsoc_lm_nongov, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_nongov$eligtreat, y = na_epsoc_lm_nongov$prop_nongov))

## p-value blocks
p_two_sided_prop_nongov_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariate
tmplm <- lm(prop_nongov ~ eligtreat +   
              highedu + Q44B + q45 + q47 + HIDIPLO +
              male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_lm_nongov, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_nongov$eligtreat, y = na_epsoc_lm_nongov$prop_nongov))

## p-value blocks
p_two_sided_prop_nongov <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))







## Effects on Partisan Preferences

#sm2 parties  
colvector <- c("country", "eligtreat", "prop_sm2", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "ID", "q11")
na_epsoc_lm_sm2 <- epsoc.data %>% select_(.dots = colvector)
na_epsoc_lm_sm2$q11[na_epsoc_lm_sm2$eligtreat==0] <- 0

na_epsoc_lm_sm2 <- na.omit(na_epsoc_lm_sm2)
names(na_epsoc_lm_sm2) <- colvector

# weighting to account for different probabilities of treatment within strata ####

### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_lm_sm2$country[!is.na(na_epsoc_lm_sm2$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc_lm_sm2$eligtreat[na_epsoc_lm_sm2$eligtreat==1 & na_epsoc_lm_sm2$country==x])/(length(na_epsoc_lm_sm2$eligtreat[na_epsoc_lm_sm2$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment Elig 2004", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc_lm_sm2 <- right_join(na_epsoc_lm_sm2, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_lm_sm2$ipw <- (1/na_epsoc_lm_sm2$p_itblock)*na_epsoc_lm_sm2$eligtreat +
  (1/(1-na_epsoc_lm_sm2$p_itblock))*(1-na_epsoc_lm_sm2$eligtreat)


epsoc.data_lm_sm2_uni <- lm(prop_sm2 ~ eligtreat + as.factor(country),  
                            data = na_epsoc_lm_sm2, weights=ipw, clusters= country)
BM_epsoc_sm2_uni <- BMlmSE(epsoc.data_lm_sm2_uni, clustervar = as.factor(na_epsoc_lm_sm2$country))

epsoc.data_lm_sm2 <- lm(prop_sm2 ~ eligtreat +   
                          highedu + Q44B + q45 + q47 + HIDIPLO +
                          male +  +q48 + votingp + interestp + schoolengage + as.factor(country) ,
                        data = na_epsoc_lm_sm2, weights=ipw, clusters = country)
BM_epsoc_sm2 <- BMlmSE(epsoc.data_lm_sm2, clustervar = as.factor(na_epsoc_lm_sm2$country))


#randomisation inference prop_sm2
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_lm_sm2$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}

#bivariate + countryFE
tmplm <- lm(prop_sm2 ~ eligtreat + as.factor(country), data=na_epsoc_lm_sm2, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_sm2$eligtreat, y = na_epsoc_lm_sm2$prop_sm2))

## p-value blocks
p_two_sided_prop_sm2_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariate
tmplm <- lm(prop_sm2 ~ eligtreat +   
              highedu + Q44B + q45 + q47 + HIDIPLO +
              male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_lm_sm2, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_sm2$eligtreat, y = na_epsoc_lm_sm2$prop_sm2))

## p-value blocks
p_two_sided_prop_sm2 <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))






## Effects on Partisan Preferences

#sm1 parties  
colvector <- c("country", "eligtreat", "prop_sm1", "highedu", "Q44B", "q45", "q47", "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage", "ID", "q11")
na_epsoc_lm_sm1 <- epsoc.data %>% select_(.dots = colvector)
na_epsoc_lm_sm1$q11[na_epsoc_lm_sm1$eligtreat==0] <- 0

na_epsoc_lm_sm1 <- na.omit(na_epsoc_lm_sm1)
names(na_epsoc_lm_sm1) <- colvector

# weighting to account for different probabilities of treatment within strata ####

### IPW na_epsoc_q1.data ###
blocks <- unique(na_epsoc_lm_sm1$country[!is.na(na_epsoc_lm_sm1$country)])  # number of blocks in the data

p_itblock <- sapply(blocks, function(x) {
  length(na_epsoc_lm_sm1$eligtreat[na_epsoc_lm_sm1$eligtreat==1 & na_epsoc_lm_sm1$country==x])/(length(na_epsoc_lm_sm1$eligtreat[na_epsoc_lm_sm1$country==x]))
}) # probability of treatment assignment within each block, i.e. N_treated/(N_treated+N_control)
p_itblock.data <- as.data.frame(cbind(blocks, p_itblock))
colnames(p_itblock.data) <- c("country", "p_itblock")
#kable(p_itblock.data, digits=2, caption = "Probability of Treatment Assingment Elig 2004", col.names = c("country", "P(D=1)_{ij}"))

na_epsoc_lm_sm1 <- right_join(na_epsoc_lm_sm1, p_itblock.data)
#calculate the inverse probability weight, multiplying the treated units by the p to be treated, and the controls by the p to be control
na_epsoc_lm_sm1$ipw <- (1/na_epsoc_lm_sm1$p_itblock)*na_epsoc_lm_sm1$eligtreat +
  (1/(1-na_epsoc_lm_sm1$p_itblock))*(1-na_epsoc_lm_sm1$eligtreat)


epsoc.data_lm_sm1_uni <- lm(prop_sm1 ~ eligtreat + as.factor(country),  
                            data = na_epsoc_lm_sm1, weights=ipw, clusters= country)
BM_epsoc_sm1_uni <- BMlmSE(epsoc.data_lm_sm1_uni, clustervar = as.factor(na_epsoc_lm_sm1$country))

epsoc.data_lm_sm1 <- lm(prop_sm1 ~ eligtreat +   
                          highedu + Q44B + q45 + q47 + HIDIPLO +
                          male +  +q48 + votingp + interestp + schoolengage + as.factor(country) ,
                        data = na_epsoc_lm_sm1, weights=ipw, clusters = country)
BM_epsoc_sm1 <- BMlmSE(epsoc.data_lm_sm1, clustervar = as.factor(na_epsoc_lm_sm1$country))


#randomisation inference prop_sm1
permute_block_sharp_null <- function(z, y) {
  Z = randomizr::block_ra(blocks = na_epsoc_lm_sm1$country)
  est_itt_null = coef(lm(y ~ Z))[["Z"]]
  return(est_itt_null)
}

#bivariate + countryFE
tmplm <- lm(prop_sm1 ~ eligtreat + as.factor(country), data=na_epsoc_lm_sm1, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_sm1$eligtreat, y = na_epsoc_lm_sm1$prop_sm1))

## p-value blocks
p_two_sided_prop_sm1_uni <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))


#covariate
tmplm <- lm(prop_sm1 ~ eligtreat +   
              highedu + Q44B + q45 + q47 + HIDIPLO +
              male +  +q48 + votingp + interestp + schoolengage + as.factor(country), data=na_epsoc_lm_sm1, weights = ipw, clusters = country)
est_itt <- coefficients(tmplm)[["eligtreat"]]

rand_dist_block <- replicate(10^3, permute_block_sharp_null(z = na_epsoc_lm_sm1$eligtreat, y = na_epsoc_lm_sm1$prop_sm1))

## p-value blocks
p_two_sided_prop_sm1 <- 2* min(mean(rand_dist_block >= est_itt), mean(rand_dist_block <= est_itt))





## ----iv-cace-partisan-other-challengers, echo=FALSE, warning=FALSE, message=FALSE, results="asis", include=FALSE----


### cace outcome non-gov partisan ties ###

cace_epsoc9_nongov.data <- na_epsoc_lm_nongov



fit.2sls_nongov <- ivreg(prop_nongov ~ q11 + as.factor(country)
                         | eligtreat + as.factor(country),
                         data=cace_epsoc9_nongov.data, weights = ipw)
est_fit.2sls_nongov <- commarobust_tidy(fit.2sls_nongov, clust_var = cace_epsoc9_nongov.data$country)



fit.2sls_nongov_cov <- ivreg(prop_nongov ~ q11 +
                               + highedu + Q44B + q45 + q47 + HIDIPLO +
                               male  +q48 + votingp + interestp + schoolengage +
                               as.factor(country)
                             | eligtreat + as.factor(country) +
                               highedu + Q44B + q45 + q47 + HIDIPLO +
                               male  +q48 + votingp + interestp + schoolengage +
                               as.factor(country),  data=cace_epsoc9_nongov.data, weights = ipw)
est_fit.2sls_nongov_cov <- commarobust_tidy(fit.2sls_nongov_cov, clust_var = cace_epsoc9_nongov.data$country)




### cace outcome small1 partisan ties ###

cace_epsoc9_sm1.data <- na_epsoc_lm_sm1


fit.2sls_sm1 <- ivreg(prop_sm1 ~ q11 + as.factor(country)
                         | eligtreat + as.factor(country),
                         data=cace_epsoc9_sm1.data, weights = ipw)
est_fit.2sls_sm1 <- commarobust_tidy(fit.2sls_sm1, clust_var = cace_epsoc9_sm1.data$country)



fit.2sls_sm1_cov <- ivreg(prop_sm1 ~ q11 +
                               + highedu + Q44B + q45 + q47 + HIDIPLO +
                               male  +q48 + votingp + interestp + schoolengage +
                               as.factor(country)
                             | eligtreat + as.factor(country) +
                               highedu + Q44B + q45 + q47 + HIDIPLO +
                               male  +q48 + votingp + interestp + schoolengage +
                               as.factor(country),  data=cace_epsoc9_sm1.data, weights = ipw)
est_fit.2sls_sm1_cov <- commarobust_tidy(fit.2sls_sm1_cov, clust_var = cace_epsoc9_sm1.data$country)




### cace outcome small2 partisan ties ###

cace_epsoc9_sm2.data <- na_epsoc_lm_sm2


fit.2sls_sm2 <- ivreg(prop_sm2 ~ q11 + as.factor(country)
                         | eligtreat + as.factor(country),
                         data=cace_epsoc9_sm2.data, weights = ipw)
est_fit.2sls_sm2 <- commarobust_tidy(fit.2sls_sm2, clust_var = cace_epsoc9_sm2.data$country)



fit.2sls_sm2_cov <- ivreg(prop_sm2 ~ q11 +
                               + highedu + Q44B + q45 + q47 + HIDIPLO +
                               male  +q48 + votingp + interestp + schoolengage +
                               as.factor(country)
                             | eligtreat + as.factor(country) +
                               highedu + Q44B + q45 + q47 + HIDIPLO +
                               male  +q48 + votingp + interestp + schoolengage +
                               as.factor(country),  data=cace_epsoc9_sm2.data, weights = ipw)
est_fit.2sls_sm2_cov <- commarobust_tidy(fit.2sls_sm2_cov, clust_var = cace_epsoc9_sm2.data$country)








## ----table-appendix-robust-classification, results="asis", include=FALSE----

#<!-- ------------------------------------------------------------------------------------------ -->
### TABLE B.9: Effect of first-time EP eligibility and voting on partisan ties to challenger parties 
#             (alternative classification of challenger parties) ###
#<!-- ------------------------------------------------------------------------------------------ -->


stargazer(epsoc.data_lm_nongov,
          epsoc.data_lm_sm1,
          epsoc.data_lm_sm2,
          se=list(BM_epsoc_nongov$adj.se,
                  BM_epsoc_sm1$adj.se,
                  BM_epsoc_sm2$adj.se),
          single.row = TRUE,
          digits=2,
          omit.stat=c("f", "ser"),
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_prop_nongov, digits=3),
                             round(p_two_sided_prop_sm1, digits=3),
                             round(p_two_sided_prop_sm2, digits=3))),
          dep.var.caption=c("Dependent variable: closeness to challenger parties"),
          dep.var.labels   = c("Non-Government Parties", "Small Parties I", "Small Parties II"),
          covariate.labels = c("Eligible"),
          #, "Education", "Household with P'", "Standard of Living",
          #                     "Religious", "Higher Edu. Parents", "Gender",
          # "Urban-Rural", "Voting Habits Parents", "Pol. Interest Parents"),
          omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47",
                   "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          title=c("Effect of first-time EP eligibility and voting on partisan ties to challenger parties (alternative classification of challenger parties)"),
          header=FALSE, notes=NULL,
          font.size="small",
          label="tab:epsoc.prop", 
          type = "text", out="./tableB9_1.txt")

stargazer(fit.2sls_nongov_cov,
          fit.2sls_sm1_cov,
          fit.2sls_sm2_cov,
          se = list(est_fit.2sls_nongov_cov[,3],
                    est_fit.2sls_sm1_cov[,3],
                    est_fit.2sls_sm2_cov[,3]),
          p = list(est_fit.2sls_nongov_cov[,5],
                   est_fit.2sls_sm1_cov[,5],
                   est_fit.2sls_sm2_cov[,5]),
          omit = c("Constant", "country", "highedu", "Q44B", "q45", "q47",
                   "HIDIPLO", "male", "q48", "votingp", "interestp", "schoolengage"),
          omit.stat=c("f", "ser", "rsq", "adj.rsq"),
          dep.var.caption=c("Dependent variable: closeness to challenger parties"),
          dep.var.labels   = c("Non-Government Parties", "Small Parties I", "Small Parties II"),
          covariate.labels = c("Voting"),
          add.lines = list(c("Random. Inf. (p-value)",
                             round(p_two_sided_prop_nongov, digits=3),
                             round(p_two_sided_prop_sm1, digits=3),
                             round(p_two_sided_prop_sm2, digits=3)),
                           c("Age", "[17.25-18.75]", "[17.25-18.75]", "[17.25-18.75]",
                             "[17.25-18.75]"),
                           c("Controls", "yes", "yes", "yes", "yes")),
          title=c("Effect of first-time EP eligibility and voting on partisan ties to challenger parties (alternative classification of challenger parties)"),
          header=FALSE, notes=NULL,
          font.size="small",
          label = c("tab:ivpartisan"),
          single.row = TRUE,
          digits=2, 
          type = "text", out="./tableB9_2.txt") 



